In [1]:
import sys
import os
import time
import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
from cuml.linear_model import LogisticRegression as cuMLLogisticRegression


print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2

from utils import *
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
In [2]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
import itertools
from cuml.linear_model import LogisticRegression as cuLogisticRegression
In [3]:
dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen",
    "multiplexed": False,
    "config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [16]:
## Baseline
run_baseline_model(
    dataset_config= dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
)
2025-08-19 11:59:17 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:17 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 11:59:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 11:59:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 11:59:22 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 11:59:23 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 11:59:23 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 11:59:23 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 11:59:23 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 11:59:23 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 11:59:23 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:23 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:23 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 11:59:23 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 11:59:27 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 11:59:29 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 11:59:30 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 11:59:30 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 11:59:30 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 11:59:30 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 11:59:30 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 11:59:30 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:30 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 11:59:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 11:59:35 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 11:59:37 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 11:59:38 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 11:59:39 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 11:59:39 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 11:59:39 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 11:59:39 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 11:59:39 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:39 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 11:59:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 11:59:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 11:59:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 11:59:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 11:59:49 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 11:59:49 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 11:59:49 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 11:59:49 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 11:59:49 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:49 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 11:59:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 11:59:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 11:59:56 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 11:59:58 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 11:59:58 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 11:59:58 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 11:59:58 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 11:59:58 INFO: [load_embeddings] paths shape: (196652,)
2025-08-19 11:59:59 INFO: [load_embeddings] multiplex=False
2025-08-19 11:59:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 11:59:59 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 11:59:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:00:04 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:00:06 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:00:08 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:00:08 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:00:08 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:00:08 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:00:08 INFO: [load_embeddings] paths shape: (196119,)
=== Batch 1 ===
Train: (830450, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
HNRNPA1_WT_Untreated: 24526
NONO_WT_Untreated: 20846
KIF5A_WT_Untreated: 20974
TOMM20_WT_Untreated: 21633
DAPI_WT_Untreated: 271498
DCP1A_WT_Untreated: 25887
LSM14A_WT_Untreated: 25124
ANXA11_WT_Untreated: 19584
PSD95_WT_Untreated: 21489
NCL_WT_Untreated: 25750
TDP43_WT_Untreated: 19716
Phalloidin_WT_Untreated: 22312
SQSTM1_WT_Untreated: 19138
SNCA_WT_Untreated: 11037
TIA1_WT_Untreated: 16541
GM130_WT_Untreated: 25126
Calreticulin_WT_Untreated: 25894
PEX14_WT_Untreated: 20145
PML_WT_Untreated: 19937
G3BP1_WT_Untreated: 9447
Tubulin_WT_Untreated: 24440
FUS_WT_Untreated: 19636
CLTC_WT_Untreated: 24225
NEMO_WT_Untreated: 24282
SON_WT_Untreated: 22340
PURA_WT_Untreated: 8626
mitotracker_WT_Untreated: 21876
LAMP1_WT_Untreated: 9717
FMRP_WT_Untreated: 8704
2025-08-19 12:00:32 INFO: [load_embeddings] multiplex=False
2025-08-19 12:00:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:00:32 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 12:00:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
              precision    recall  f1-score   support

           0       0.99      0.98      0.98      4777
           1       0.95      0.96      0.96      5749
           2       0.97      0.98      0.97      6639
           3       1.00      1.00      1.00     63181
           4       0.96      0.95      0.96      5177
           5       0.97      0.97      0.97      2678
           6       0.99      0.98      0.98      6586
           7       0.98      0.92      0.95      2689
           8       0.98      0.99      0.99      6376
           9       0.98      0.99      0.98      6023
          10       0.95      0.97      0.96      4799
          11       0.97      0.89      0.93      1561
          12       0.97      0.99      0.98      6008
          13       1.00      0.99      0.99      6573
          14       0.99      0.99      0.99      5115
          15       0.90      0.98      0.93      4542
          16       0.99      0.99      0.99      4830
          17       0.99      1.00      1.00      4895
          18       0.75      0.97      0.84      5739
          19       0.96      0.91      0.93      2650
          20       0.96      0.97      0.96      5431
          21       0.97      0.94      0.95      2368
          22       0.99      1.00      0.99      4987
          23       0.79      0.48      0.60      4527
          24       0.97      0.90      0.94      4683
          25       0.99      0.99      0.99      3899
          26       0.96      0.94      0.95      4400
          27       0.66      0.91      0.76      4873
          28       0.91      0.59      0.72      4364

    accuracy                           0.96    196119
   macro avg       0.95      0.93      0.94    196119
weighted avg       0.96      0.96      0.96    196119

2025-08-19 12:00:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:00:39 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:00:41 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:00:41 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:00:41 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:00:41 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:00:41 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 12:00:41 INFO: [load_embeddings] multiplex=False
2025-08-19 12:00:41 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:00:41 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 12:00:41 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:00:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:00:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:00:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:00:48 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 12:00:48 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 12:00:48 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 12:00:48 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 12:00:48 INFO: [load_embeddings] multiplex=False
2025-08-19 12:00:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:00:48 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 12:00:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:00:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:00:56 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:00:57 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:00:57 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 12:00:57 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 12:00:57 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:00:57 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 12:00:58 INFO: [load_embeddings] multiplex=False
2025-08-19 12:00:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:00:58 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 12:00:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:01:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:06 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:07 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:08 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 12:01:08 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 12:01:08 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 12:01:08 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 12:01:08 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:08 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 12:01:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:01:14 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:16 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:17 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:18 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 12:01:18 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 12:01:18 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 12:01:18 INFO: [load_embeddings] paths shape: (196652,)
2025-08-19 12:01:18 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:18 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:18 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 12:01:18 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:01:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:24 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:25 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 12:01:25 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 12:01:25 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 12:01:25 INFO: [load_embeddings] paths shape: (141079,)
=== Batch 2 ===
Train: (885490, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (141079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 289648
PEX14_WT_Untreated: 21109
Calreticulin_WT_Untreated: 29143
TDP43_WT_Untreated: 20703
TIA1_WT_Untreated: 16825
TOMM20_WT_Untreated: 22332
HNRNPA1_WT_Untreated: 26333
KIF5A_WT_Untreated: 21917
LSM14A_WT_Untreated: 26961
SON_WT_Untreated: 22776
CLTC_WT_Untreated: 25663
GM130_WT_Untreated: 28274
NEMO_WT_Untreated: 25445
Tubulin_WT_Untreated: 25416
PURA_WT_Untreated: 10490
FMRP_WT_Untreated: 10444
DCP1A_WT_Untreated: 27007
NONO_WT_Untreated: 20846
PML_WT_Untreated: 21075
mitotracker_WT_Untreated: 22238
SQSTM1_WT_Untreated: 20119
ANXA11_WT_Untreated: 20574
Phalloidin_WT_Untreated: 23886
G3BP1_WT_Untreated: 11183
PSD95_WT_Untreated: 22936
FUS_WT_Untreated: 22609
SNCA_WT_Untreated: 10936
NCL_WT_Untreated: 28699
LAMP1_WT_Untreated: 9903
2025-08-19 12:01:33 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:33 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:33 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 12:01:33 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3787
           1       0.95      0.95      0.95      4311
           2       0.89      0.88      0.88      3390
           3       1.00      1.00      1.00     45031
           4       0.97      0.93      0.95      4057
           5       0.94      0.97      0.95       938
           6       0.99      0.91      0.95      3613
           7       0.91      0.96      0.94       953
           8       0.98      0.97      0.97      3228
           9       0.93      0.99      0.96      4216
          10       0.96      0.97      0.96      3856
          11       0.95      0.91      0.93      1375
          12       0.94      0.98      0.96      4171
          13       0.99      0.98      0.98      3624
          14       0.99      0.99      0.99      3952
          15       0.89      0.91      0.90      4542
          16       0.99      0.99      0.99      3866
          17       0.99      1.00      1.00      3757
          18       0.96      0.79      0.87      4292
          19       0.91      0.93      0.92       786
          20       0.94      0.88      0.91      3857
          21       0.96      0.92      0.94      2469
          22       0.99      1.00      0.99      4551
          23       0.65      0.83      0.73      3546
          24       0.89      0.86      0.88      3696
          25       1.00      0.98      0.99      3615
          26       0.93      0.94      0.93      3701
          27       0.88      0.66      0.75      3897
          28       0.79      0.98      0.87      4002

    accuracy                           0.95    141079
   macro avg       0.93      0.93      0.93    141079
weighted avg       0.95      0.95      0.95    141079

2025-08-19 12:01:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:40 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:41 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:42 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:01:42 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:01:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:01:42 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 12:01:42 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:42 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 12:01:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:01:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:48 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 12:01:48 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 12:01:48 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 12:01:48 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 12:01:48 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:49 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 12:01:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:01:53 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:01:55 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:01:57 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:01:57 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 12:01:57 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 12:01:57 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:01:57 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 12:01:57 INFO: [load_embeddings] multiplex=False
2025-08-19 12:01:57 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:01:57 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 12:01:57 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:02:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:05 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:07 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:07 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 12:02:07 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 12:02:07 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 12:02:07 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 12:02:07 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:07 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:07 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 12:02:07 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:02:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:15 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:16 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:16 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 12:02:17 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 12:02:17 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 12:02:17 INFO: [load_embeddings] paths shape: (196652,)
2025-08-19 12:02:17 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:17 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 12:02:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:02:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:22 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:23 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:24 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 12:02:24 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 12:02:24 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 12:02:24 INFO: [load_embeddings] paths shape: (134336,)
=== Batch 3 ===
Train: (892233, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (134336, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 291501
PEX14_WT_Untreated: 21028
Calreticulin_WT_Untreated: 28546
TDP43_WT_Untreated: 20828
TIA1_WT_Untreated: 16892
TOMM20_WT_Untreated: 22202
HNRNPA1_WT_Untreated: 26716
KIF5A_WT_Untreated: 22810
LSM14A_WT_Untreated: 27335
SON_WT_Untreated: 23673
CLTC_WT_Untreated: 26550
GM130_WT_Untreated: 27676
NEMO_WT_Untreated: 25953
Tubulin_WT_Untreated: 26110
PURA_WT_Untreated: 9719
FMRP_WT_Untreated: 9628
DCP1A_WT_Untreated: 27588
NONO_WT_Untreated: 21731
PML_WT_Untreated: 21828
mitotracker_WT_Untreated: 22251
SQSTM1_WT_Untreated: 20505
ANXA11_WT_Untreated: 20494
Phalloidin_WT_Untreated: 24168
G3BP1_WT_Untreated: 10290
PSD95_WT_Untreated: 23714
FUS_WT_Untreated: 22519
SNCA_WT_Untreated: 10991
NCL_WT_Untreated: 28627
LAMP1_WT_Untreated: 10360
2025-08-19 12:02:32 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:32 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 12:02:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3867
           1       0.87      0.95      0.91      3424
           2       0.98      0.88      0.93      3987
           3       1.00      1.00      1.00     43178
           4       0.93      0.90      0.91      3476
           5       0.91      0.94      0.92      1754
           6       0.99      0.98      0.99      3703
           7       0.94      0.87      0.90      1846
           8       0.98      0.96      0.97      3826
           9       0.99      0.99      0.99      3833
          10       0.97      0.92      0.94      2963
          11       0.94      0.93      0.93       918
          12       0.94      0.97      0.95      3797
          13       0.99      0.99      0.99      3696
          14       0.97      0.98      0.98      3444
          15       0.96      0.76      0.85      3657
          16       0.97      0.99      0.98      3947
          17       0.99      0.99      0.99      3004
          18       0.95      0.94      0.94      3514
          19       0.90      0.89      0.90      1557
          20       0.77      0.88      0.83      3575
          21       0.92      0.95      0.93      2414
          22       0.99      1.00      0.99      3654
          23       0.66      0.59      0.62      3160
          24       0.80      0.97      0.87      3571
          25       0.98      0.99      0.98      3548
          26       0.79      0.97      0.87      3831
          27       0.80      0.67      0.73      3203
          28       0.87      0.77      0.82      3989

    accuracy                           0.94    134336
   macro avg       0.92      0.92      0.92    134336
weighted avg       0.94      0.94      0.94    134336

2025-08-19 12:02:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:40 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:41 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:42 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:02:42 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:02:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:02:42 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 12:02:42 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:42 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 12:02:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:02:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:49 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 12:02:49 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 12:02:49 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 12:02:49 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 12:02:49 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:49 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 12:02:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:02:53 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:02:54 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:02:55 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:02:56 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 12:02:56 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 12:02:56 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 12:02:56 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 12:02:56 INFO: [load_embeddings] multiplex=False
2025-08-19 12:02:56 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:02:56 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 12:02:56 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:03:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:03:04 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:03:06 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:03:06 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 12:03:06 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 12:03:06 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 12:03:06 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 12:03:06 INFO: [load_embeddings] multiplex=False
2025-08-19 12:03:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:03:06 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 12:03:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:03:12 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:03:14 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:03:16 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:03:16 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 12:03:16 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 12:03:16 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 12:03:16 INFO: [load_embeddings] paths shape: (196652,)
2025-08-19 12:03:16 INFO: [load_embeddings] multiplex=False
2025-08-19 12:03:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:03:16 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 12:03:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:03:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:03:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:03:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:03:25 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 12:03:25 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 12:03:25 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:03:25 INFO: [load_embeddings] paths shape: (189079,)
=== Batch 7 ===
Train: (837490, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (189079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 270193
PEX14_WT_Untreated: 20964
Calreticulin_WT_Untreated: 26534
TDP43_WT_Untreated: 19874
TIA1_WT_Untreated: 17201
TOMM20_WT_Untreated: 21012
HNRNPA1_WT_Untreated: 25487
KIF5A_WT_Untreated: 20280
LSM14A_WT_Untreated: 25505
SON_WT_Untreated: 21753
CLTC_WT_Untreated: 23804
GM130_WT_Untreated: 25432
NEMO_WT_Untreated: 25027
Tubulin_WT_Untreated: 22723
PURA_WT_Untreated: 9113
FMRP_WT_Untreated: 9656
DCP1A_WT_Untreated: 25278
NONO_WT_Untreated: 20764
PML_WT_Untreated: 19880
mitotracker_WT_Untreated: 21332
SQSTM1_WT_Untreated: 18580
ANXA11_WT_Untreated: 19572
Phalloidin_WT_Untreated: 22211
G3BP1_WT_Untreated: 10348
PSD95_WT_Untreated: 22164
FUS_WT_Untreated: 26185
SNCA_WT_Untreated: 11279
NCL_WT_Untreated: 26158
LAMP1_WT_Untreated: 9181
2025-08-19 12:03:43 INFO: [load_embeddings] multiplex=False
2025-08-19 12:03:43 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:03:43 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 12:03:43 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
              precision    recall  f1-score   support

           0       0.98      0.96      0.97      4789
           1       0.70      0.95      0.80      6170
           2       0.80      0.75      0.77      5999
           3       1.00      1.00      1.00     64486
           4       0.96      0.82      0.88      5786
           5       0.95      0.94      0.95      1726
           6       0.00      0.00      0.00        37
           7       0.92      0.97      0.94      1788
           8       0.46      0.55      0.50      6070
           9       0.98      0.05      0.10      5062
          10       0.97      0.96      0.96      5493
          11       0.63      0.97      0.76      2097
          12       0.97      0.61      0.75      5627
          13       0.96      0.09      0.17      6165
          14       0.93      0.98      0.95      4370
          15       0.95      0.92      0.94      4624
          16       0.98      0.98      0.98      4011
          17       0.66      1.00      0.79      4952
          18       0.90      0.97      0.93      5064
          19       0.90      0.95      0.92      2163
          20       0.94      0.95      0.95      5532
          21       0.67      0.89      0.77      2126
          22       0.94      1.00      0.97      5574
          23       0.57      0.62      0.59      5085
          24       0.92      0.88      0.90      4525
          25       0.94      0.97      0.96      3239
          26       0.98      0.93      0.95      5021
          27       0.73      0.82      0.77      6590
          28       0.94      0.78      0.86      4908

    accuracy                           0.87    189079
   macro avg       0.84      0.80      0.79    189079
weighted avg       0.91      0.87      0.86    189079

2025-08-19 12:03:49 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:03:51 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:03:53 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:03:54 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:03:54 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:03:54 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:03:54 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 12:03:54 INFO: [load_embeddings] multiplex=False
2025-08-19 12:03:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:03:54 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 12:03:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:03:58 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:03:59 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:04:00 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:04:01 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 12:04:01 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 12:04:01 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 12:04:01 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 12:04:01 INFO: [load_embeddings] multiplex=False
2025-08-19 12:04:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:04:01 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 12:04:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:04:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:04:07 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:04:08 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:04:09 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 12:04:09 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 12:04:09 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 12:04:09 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 12:04:09 INFO: [load_embeddings] multiplex=False
2025-08-19 12:04:09 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:04:09 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 12:04:09 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:04:14 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:04:17 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:04:18 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:04:19 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 12:04:19 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 12:04:19 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:04:19 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 12:04:19 INFO: [load_embeddings] multiplex=False
2025-08-19 12:04:19 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:04:19 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 12:04:19 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:04:25 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:04:27 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:04:29 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:04:30 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 12:04:30 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 12:04:30 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 12:04:30 INFO: [load_embeddings] paths shape: (196652,)
2025-08-19 12:04:30 INFO: [load_embeddings] multiplex=False
2025-08-19 12:04:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:04:30 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 12:04:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:04:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:04:39 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:04:41 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:04:41 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 12:04:41 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 12:04:41 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 12:04:41 INFO: [load_embeddings] paths shape: (169304,)
=== Batch 8 ===
Train: (857265, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (169304, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 278916
PEX14_WT_Untreated: 20900
Calreticulin_WT_Untreated: 26659
TDP43_WT_Untreated: 20633
TIA1_WT_Untreated: 17450
TOMM20_WT_Untreated: 22224
HNRNPA1_WT_Untreated: 25041
KIF5A_WT_Untreated: 22126
LSM14A_WT_Untreated: 25648
SON_WT_Untreated: 22072
CLTC_WT_Untreated: 26025
GM130_WT_Untreated: 25865
NEMO_WT_Untreated: 23656
Tubulin_WT_Untreated: 24531
PURA_WT_Untreated: 9460
FMRP_WT_Untreated: 9449
DCP1A_WT_Untreated: 25323
NONO_WT_Untreated: 20689
PML_WT_Untreated: 21457
mitotracker_WT_Untreated: 22540
SQSTM1_WT_Untreated: 20396
ANXA11_WT_Untreated: 20291
Phalloidin_WT_Untreated: 23674
G3BP1_WT_Untreated: 9971
PSD95_WT_Untreated: 24965
FUS_WT_Untreated: 20373
SNCA_WT_Untreated: 11895
NCL_WT_Untreated: 26475
LAMP1_WT_Untreated: 8561
2025-08-19 12:05:03 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:03 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 12:05:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
              precision    recall  f1-score   support

           0       0.96      0.97      0.97      4070
           1       0.94      0.73      0.82      3949
           2       0.94      0.90      0.92      5874
           3       1.00      1.00      1.00     55763
           4       0.96      0.97      0.97      5741
           5       0.85      0.97      0.90      1933
           6       0.97      0.56      0.71      5849
           7       0.93      0.96      0.95      2165
           8       0.98      0.69      0.81      5637
           9       0.67      0.99      0.80      5508
          10       0.94      0.97      0.96      3647
          11       0.97      0.91      0.94      2717
          12       0.94      0.97      0.96      5484
          13       0.93      0.98      0.95      5848
          14       0.99      1.00      0.99      5741
          15       0.93      0.94      0.94      4699
          16       0.89      0.98      0.93      4075
          17       0.93      0.99      0.96      3375
          18       0.86      0.95      0.90      2263
          19       0.89      0.96      0.92      1816
          20       0.90      0.93      0.91      4069
          21       0.90      0.94      0.92      1510
          22       0.95      1.00      0.97      5255
          23       0.62      0.66      0.64      3269
          24       0.93      0.90      0.92      3766
          25       0.98      0.95      0.97      2990
          26       0.85      0.94      0.89      3809
          27       0.79      0.72      0.75      4782
          28       0.85      0.89      0.87      3700

    accuracy                           0.93    169304
   macro avg       0.91      0.91      0.90    169304
weighted avg       0.94      0.93      0.93    169304

2025-08-19 12:05:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:12 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:13 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 12:05:13 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 12:05:13 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:05:13 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 12:05:13 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:13 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:13 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 12:05:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:05:17 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:18 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:20 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:20 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 12:05:20 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 12:05:20 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 12:05:20 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 12:05:20 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:20 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 12:05:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:05:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:26 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:27 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:27 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 12:05:27 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 12:05:27 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 12:05:27 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 12:05:28 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:28 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 12:05:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:05:33 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:35 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:37 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:37 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 12:05:37 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 12:05:37 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 12:05:37 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 12:05:37 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:37 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 12:05:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:05:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:46 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:47 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:48 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 12:05:48 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 12:05:48 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 12:05:48 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 12:05:49 INFO: [load_embeddings] multiplex=False
2025-08-19 12:05:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 12:05:49 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 12:05:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 12:05:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 12:05:57 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 12:05:58 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 12:05:59 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 12:05:59 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 12:05:59 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 12:05:59 INFO: [load_embeddings] paths shape: (196652,)
=== Batch 9 ===
Train: (829917, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (196652, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 271639
PEX14_WT_Untreated: 20729
Calreticulin_WT_Untreated: 25889
TDP43_WT_Untreated: 20241
TIA1_WT_Untreated: 17291
TOMM20_WT_Untreated: 20762
HNRNPA1_WT_Untreated: 24642
KIF5A_WT_Untreated: 20758
LSM14A_WT_Untreated: 25087
SON_WT_Untreated: 24021
CLTC_WT_Untreated: 23603
GM130_WT_Untreated: 25137
NEMO_WT_Untreated: 22622
Tubulin_WT_Untreated: 23345
PURA_WT_Untreated: 8972
FMRP_WT_Untreated: 9029
DCP1A_WT_Untreated: 24237
NONO_WT_Untreated: 22064
PML_WT_Untreated: 19983
mitotracker_WT_Untreated: 20963
SQSTM1_WT_Untreated: 19587
ANXA11_WT_Untreated: 21290
Phalloidin_WT_Untreated: 22464
G3BP1_WT_Untreated: 9441
PSD95_WT_Untreated: 20872
FUS_WT_Untreated: 19788
SNCA_WT_Untreated: 10887
NCL_WT_Untreated: 25906
LAMP1_WT_Untreated: 8668
              precision    recall  f1-score   support

           0       0.99      0.68      0.80      3071
           1       0.98      0.92      0.95      6371
           2       0.87      0.97      0.92      6644
           3       1.00      1.00      1.00     63040
           4       0.95      0.98      0.96      6827
           5       0.98      0.99      0.98      2353
           6       0.84      0.87      0.86      6434
           7       0.92      0.99      0.95      2695
           8       0.98      0.99      0.99      6365
           9       0.85      0.82      0.84      5907
          10       0.98      0.97      0.97      5015
          11       0.95      0.98      0.96      2610
          12       0.98      0.96      0.97      6045
          13       1.00      0.98      0.99      6417
          14       0.98      1.00      0.99      6775
          15       0.86      0.85      0.86      3324
          16       1.00      0.95      0.97      4246
          17       0.99      1.00      0.99      4849
          18       0.95      0.93      0.94      6356
          19       0.98      0.88      0.93      2304
          20       0.92      0.96      0.94      5279
          21       0.75      0.87      0.80      2518
          22       1.00      1.00      1.00      3306
          23       0.69      0.43      0.53      4078
          24       0.88      0.89      0.89      4158
          25       0.92      0.99      0.95      3149
          26       0.98      0.94      0.96      5271
          27       0.69      0.87      0.77      5968
          28       0.90      0.89      0.89      5277

    accuracy                           0.94    196652
   macro avg       0.92      0.91      0.92    196652
weighted avg       0.95      0.94      0.94    196652


=== Overall Accuracy ===
0.9325490639810191 [0.9588260188966903, 0.951268438250909, 0.9434775488327776, 0.8660612759745926, 0.931165241222889, 0.9444958607082562]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.997883     0.939535     0.999302 0.970324 0.998531
        CLTC_WT_Untreated  0.993837     0.915727     0.996186 0.878364 0.997462
Calreticulin_WT_Untreated  0.993824     0.899825     0.996901 0.904775 0.996722
        DAPI_WT_Untreated  0.999661     0.999964     0.999514 0.998997 0.999983
       DCP1A_WT_Untreated  0.996571     0.927730     0.998719 0.957633 0.997747
        FMRP_WT_Untreated  0.998845     0.962221     0.999255 0.935429 0.999576
         FUS_WT_Untreated  0.990330     0.848219     0.994055 0.789031 0.996014
       G3BP1_WT_Untreated  0.998591     0.946605     0.999213 0.935048 0.999361
       GM130_WT_Untreated  0.990985     0.848232     0.995504 0.856580 0.995197
     HNRNPA1_WT_Untreated  0.990137     0.799699     0.995978 0.859122 0.993870
       KIF5A_WT_Untreated  0.998070     0.962713     0.998981 0.960514 0.999040
       LAMP1_WT_Untreated  0.997765     0.936070     0.998451 0.870322 0.999289
      LSM14A_WT_Untreated  0.996061     0.908583     0.998797 0.959368 0.997146
         NCL_WT_Untreated  0.993606     0.815364     0.999401 0.977886 0.994030
        NEMO_WT_Untreated  0.999026     0.989761     0.999299 0.976540 0.999698
        NONO_WT_Untreated  0.995432     0.900110     0.997850 0.913897 0.997468
       PEX14_WT_Untreated  0.998721     0.980821     0.999167 0.967075 0.999522
         PML_WT_Untreated  0.997045     0.995933     0.997072 0.893978 0.999899
       PSD95_WT_Untreated  0.994746     0.924379     0.996663 0.882999 0.997937
        PURA_WT_Untreated  0.998321     0.919475     0.999196 0.927039 0.999106
  Phalloidin_WT_Untreated  0.995730     0.933821     0.997450 0.910487 0.998161
        SNCA_WT_Untreated  0.996782     0.915479     0.997857 0.849685 0.998881
         SON_WT_Untreated  0.999166     0.996524     0.999238 0.972815 0.999905
      SQSTM1_WT_Untreated  0.983160     0.592267     0.992384 0.647271 0.990398
       TDP43_WT_Untreated  0.995215     0.900119     0.997530 0.898719 0.997568
        TIA1_WT_Untreated  0.998936     0.979012     0.999341 0.967931 0.999574
      TOMM20_WT_Untreated  0.996368     0.943841     0.997734 0.915530 0.998538
     Tubulin_WT_Untreated  0.985713     0.789104     0.991492 0.731623 0.993787
 mitotracker_WT_Untreated  0.992246     0.814748     0.996902 0.873397 0.995149
            Macro Average  0.995268     0.906410     0.997567 0.902841 0.997571
In [3]:
## Baseline
run_baseline_model(
    dataset_config= dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
    train_specific_batches = [1],
    results_csv = 'classification_results-indi.csv'
)
Loading all batches...
2025-08-20 18:05:08 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/200825_180507_949809_64708_galavir_sysdashboardsysjupyter.log; JOBID: 64708 Username: galavir) JOBNAME: sysdashboardsysjupyter
2025-08-20 18:05:08 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input
2025-08-20 18:05:08 INFO: [load_embeddings] multiplex=False
2025-08-20 18:05:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:05:08 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 18:05:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:05:18 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:05:20 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:05:22 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:05:22 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-20 18:05:22 INFO: [load_embeddings] labels shape: (196119,)
2025-08-20 18:05:22 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 18:05:22 INFO: [load_embeddings] paths shape: (196119,)
2025-08-20 18:05:22 INFO: [load_embeddings] multiplex=False
2025-08-20 18:05:22 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:05:22 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 18:05:22 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:05:27 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:05:28 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:05:29 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:05:30 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-20 18:05:30 INFO: [load_embeddings] labels shape: (141079,)
2025-08-20 18:05:30 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-20 18:05:30 INFO: [load_embeddings] paths shape: (141079,)
2025-08-20 18:05:30 INFO: [load_embeddings] multiplex=False
2025-08-20 18:05:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:05:30 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 18:05:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:05:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:05:37 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:05:38 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:05:38 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-20 18:05:39 INFO: [load_embeddings] labels shape: (134336,)
2025-08-20 18:05:39 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-20 18:05:39 INFO: [load_embeddings] paths shape: (134336,)
2025-08-20 18:05:39 INFO: [load_embeddings] multiplex=False
2025-08-20 18:05:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:05:39 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 18:05:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:05:49 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:05:51 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:05:52 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:05:53 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-20 18:05:53 INFO: [load_embeddings] labels shape: (189079,)
2025-08-20 18:05:53 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 18:05:53 INFO: [load_embeddings] paths shape: (189079,)
2025-08-20 18:05:53 INFO: [load_embeddings] multiplex=False
2025-08-20 18:05:53 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:05:53 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 18:05:53 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:06:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:06:04 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:06:05 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:06:06 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-20 18:06:06 INFO: [load_embeddings] labels shape: (169304,)
2025-08-20 18:06:06 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-20 18:06:06 INFO: [load_embeddings] paths shape: (169304,)
2025-08-20 18:06:06 INFO: [load_embeddings] multiplex=False
2025-08-20 18:06:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:06:06 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 18:06:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:06:15 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:06:17 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:06:18 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:06:19 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-20 18:06:19 INFO: [load_embeddings] labels shape: (196652,)
2025-08-20 18:06:19 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-20 18:06:19 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (141079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.96      0.99      0.98      3787
           1       0.92      0.94      0.93      4311
           2       0.89      0.87      0.88      3390
           3       0.99      1.00      1.00     45031
           4       0.97      0.93      0.95      4057
           5       0.87      0.98      0.92       938
           6       0.98      0.82      0.89      3613
           7       0.81      0.97      0.88       953
           8       0.96      0.97      0.97      3228
           9       0.91      0.99      0.95      4216
          10       0.94      0.96      0.95      3856
          11       0.90      0.86      0.88      1375
          12       0.95      0.96      0.95      4171
          13       0.99      0.98      0.99      3624
          14       0.99      0.99      0.99      3952
          15       0.95      0.89      0.92      4542
          16       0.98      0.99      0.99      3866
          17       0.99      1.00      0.99      3757
          18       0.94      0.68      0.79      4292
          19       0.84      0.92      0.88       786
          20       0.91      0.88      0.89      3857
          21       0.96      0.94      0.95      2469
          22       0.99      1.00      1.00      4551
          23       0.55      0.78      0.64      3546
          24       0.88      0.94      0.91      3696
          25       0.99      0.98      0.98      3615
          26       0.86      0.93      0.89      3701
          27       0.90      0.42      0.57      3897
          28       0.73      0.94      0.83      4002

    accuracy                           0.94    141079
   macro avg       0.91      0.91      0.91    141079
weighted avg       0.94      0.94      0.93    141079

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (134336, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3867
           1       0.83      0.91      0.87      3424
           2       0.97      0.85      0.91      3987
           3       1.00      1.00      1.00     43178
           4       0.91      0.88      0.90      3476
           5       0.87      0.96      0.91      1754
           6       0.99      0.95      0.97      3703
           7       0.80      0.91      0.85      1846
           8       0.97      0.91      0.94      3826
           9       0.98      0.99      0.99      3833
          10       0.94      0.92      0.93      2963
          11       0.84      0.94      0.89       918
          12       0.94      0.96      0.95      3797
          13       0.99      0.99      0.99      3696
          14       0.94      0.99      0.97      3444
          15       0.97      0.86      0.91      3657
          16       0.97      0.99      0.98      3947
          17       0.99      0.99      0.99      3004
          18       0.91      0.87      0.89      3514
          19       0.87      0.91      0.89      1557
          20       0.74      0.85      0.79      3575
          21       0.93      0.94      0.94      2414
          22       0.99      1.00      0.99      3654
          23       0.55      0.60      0.57      3160
          24       0.89      0.97      0.93      3571
          25       0.97      0.99      0.98      3548
          26       0.69      0.95      0.80      3831
          27       0.80      0.46      0.58      3203
          28       0.78      0.55      0.64      3989

    accuracy                           0.93    134336
   macro avg       0.90      0.90      0.89    134336
weighted avg       0.93      0.93      0.93    134336

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (189079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.98      0.83      0.90      4789
           1       0.68      0.89      0.77      6170
           2       0.57      0.85      0.68      5999
           3       0.99      1.00      1.00     64486
           4       0.92      0.77      0.84      5786
           5       0.92      0.92      0.92      1726
           6       0.00      0.00      0.00        37
           7       0.76      0.96      0.85      1788
           8       0.33      0.21      0.26      6070
           9       0.94      0.01      0.03      5062
          10       0.85      0.96      0.90      5493
          11       0.28      0.81      0.41      2097
          12       0.88      0.36      0.51      5627
          13       0.47      0.01      0.01      6165
          14       0.88      0.98      0.93      4370
          15       0.89      0.71      0.79      4624
          16       0.94      0.94      0.94      4011
          17       0.45      1.00      0.62      4952
          18       0.86      0.95      0.90      5064
          19       0.75      0.91      0.82      2163
          20       0.89      0.95      0.92      5532
          21       0.86      0.64      0.74      2126
          22       0.99      0.98      0.99      5574
          23       0.58      0.68      0.63      5085
          24       0.73      0.78      0.75      4525
          25       0.82      0.90      0.86      3239
          26       0.98      0.82      0.89      5021
          27       0.75      0.72      0.73      6590
          28       0.78      0.19      0.31      4908

    accuracy                           0.80    189079
   macro avg       0.75      0.71      0.69    189079
weighted avg       0.84      0.80      0.79    189079

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (169304, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.97      0.93      0.95      4070
           1       0.90      0.64      0.75      3949
           2       0.85      0.97      0.90      5874
           3       0.99      1.00      0.99     55763
           4       0.95      0.96      0.95      5741
           5       0.80      0.96      0.87      1933
           6       0.93      0.77      0.84      5849
           7       0.78      0.96      0.86      2165
           8       0.95      0.61      0.74      5637
           9       0.85      0.95      0.90      5508
          10       0.80      0.97      0.87      3647
          11       0.93      0.72      0.82      2717
          12       0.90      0.94      0.92      5484
          13       0.90      0.97      0.93      5848
          14       0.97      1.00      0.98      5741
          15       0.88      0.75      0.81      4699
          16       0.80      0.95      0.87      4075
          17       0.90      0.99      0.94      3375
          18       0.76      0.91      0.83      2263
          19       0.75      0.92      0.83      1816
          20       0.83      0.93      0.88      4069
          21       0.88      0.90      0.89      1510
          22       0.97      0.99      0.98      5255
          23       0.54      0.67      0.60      3269
          24       0.74      0.86      0.80      3766
          25       0.96      0.92      0.94      2990
          26       0.86      0.84      0.85      3809
          27       0.78      0.58      0.67      4782
          28       0.91      0.54      0.68      3700

    accuracy                           0.91    169304
   macro avg       0.86      0.86      0.86    169304
weighted avg       0.91      0.91      0.90    169304

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (196652, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.96      0.38      0.54      3071
           1       0.93      0.92      0.92      6371
           2       0.62      0.98      0.76      6644
           3       1.00      1.00      1.00     63040
           4       0.91      0.96      0.93      6827
           5       0.97      0.98      0.97      2353
           6       0.85      0.99      0.92      6434
           7       0.85      0.98      0.91      2695
           8       0.94      0.99      0.97      6365
           9       0.99      0.80      0.88      5907
          10       0.90      0.97      0.93      5015
          11       0.87      0.76      0.81      2610
          12       0.93      0.93      0.93      6045
          13       0.99      0.97      0.98      6417
          14       0.92      1.00      0.96      6775
          15       0.68      0.62      0.65      3324
          16       0.98      0.89      0.93      4246
          17       0.97      0.99      0.98      4849
          18       0.91      0.89      0.90      6356
          19       0.86      0.82      0.84      2304
          20       0.81      0.96      0.88      5279
          21       0.62      0.63      0.63      2518
          22       0.99      0.99      0.99      3306
          23       0.61      0.52      0.56      4078
          24       0.72      0.75      0.73      4158
          25       0.77      0.98      0.86      3149
          26       0.98      0.81      0.89      5271
          27       0.72      0.75      0.74      5968
          28       0.79      0.29      0.42      5277

    accuracy                           0.90    196652
   macro avg       0.86      0.84      0.84    196652
weighted avg       0.91      0.90      0.90    196652


=== Overall Accuracy ===
0.8952082124805276 [0.9362555731186073, 0.9276441162458313, 0.8037010984826448, 0.9056017577848131, 0.9028385167707422]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.995581     0.841810     0.999295 0.966467 0.996191
        CLTC_WT_Untreated  0.990983     0.868029     0.994678 0.830523 0.996029
Calreticulin_WT_Untreated  0.986117     0.911331     0.988524 0.718772 0.997121
        DAPI_WT_Untreated  0.997806     0.999989     0.996746 0.993345 0.999995
       DCP1A_WT_Untreated  0.994769     0.901456     0.997771 0.928648 0.996832
        FMRP_WT_Untreated  0.998248     0.957835     0.998676 0.884562 0.999553
         FUS_WT_Untreated  0.989688     0.883938     0.992249 0.734160 0.997175
       G3BP1_WT_Untreated  0.996822     0.954165     0.997313 0.803387 0.999471
       GM130_WT_Untreated  0.986956     0.703415     0.995803 0.839460 0.990793
     HNRNPA1_WT_Untreated  0.990408     0.733670     0.998221 0.926189 0.991946
       KIF5A_WT_Untreated  0.995517     0.957137     0.996511 0.876676 0.998887
       LAMP1_WT_Untreated  0.991375     0.793352     0.993719 0.599269 0.997544
      LSM14A_WT_Untreated  0.992354     0.814560     0.997900 0.923678 0.994236
         NCL_WT_Untreated  0.991095     0.741709     0.999075 0.962506 0.991795
        NEMO_WT_Untreated  0.997861     0.990693     0.998077 0.939467 0.999719
        NONO_WT_Untreated  0.991576     0.768013     0.997332 0.881123 0.994046
       PEX14_WT_Untreated  0.997045     0.949417     0.998229 0.930208 0.998742
         PML_WT_Untreated  0.991768     0.993479     0.991726 0.747067 0.999838
       PSD95_WT_Untreated  0.993511     0.863232     0.996971 0.883333 0.996369
        PURA_WT_Untreated  0.996604     0.888824     0.997736 0.804681 0.998832
  Phalloidin_WT_Untreated  0.992933     0.919236     0.994967 0.834520 0.997764
        SNCA_WT_Untreated  0.995421     0.806469     0.997966 0.842260 0.997395
         SON_WT_Untreated  0.999396     0.991047     0.999626 0.986543 0.999752
      SQSTM1_WT_Untreated  0.980460     0.647403     0.988316 0.566555 0.991655
       TDP43_WT_Untreated  0.990992     0.853266     0.994341 0.785718 0.996424
        TIA1_WT_Untreated  0.996888     0.954900     0.997742 0.895764 0.999082
      TOMM20_WT_Untreated  0.992909     0.861739     0.996417 0.865460 0.996302
     Tubulin_WT_Untreated  0.983191     0.616326     0.994315 0.766760 0.988435
 mitotracker_WT_Untreated  0.982767     0.474721     0.996512 0.786445 0.985939
            Macro Average  0.992450     0.849695     0.996095 0.844950 0.996133
Out[3]:
{'Accuracy': 0.9924496274350632,
 'Sensitivity': 0.8496952755838646,
 'Specificity': 0.9960950122368418,
 'PPV': 0.8449497977273821,
 'NPV': 0.9961332704646154}
In [4]:
additional_classifiers = [
    (GaussianNB, {}),
    (RidgeClassifier, {}),
    (LinearSVC, {"C": 1.0, "max_iter": 1000, "random_state": 42}),
    (RandomForestClassifier, {"n_estimators": 300, "random_state": 42}),
    (ExtraTreesClassifier, {
        "max_depth": None,
        "min_samples_leaf": 1,
        "n_estimators": 300,
        "n_jobs": -1,
        "random_state": 42
    }),
]
In [4]:
for clf_class, clf_kwargs in additional_classifiers:
    print(f"\n=== Running {clf_class.__name__} ===")
    run_baseline_model(
        dataset_config=dataset_config,
        batches=[1, 2, 3, 7, 8, 9],
        classifier_class=clf_class,
        classifier_kwargs=clf_kwargs,
        train_specific_batches=[1],
        results_csv="classification_results-indi.csv"
    )
2025-08-20 18:42:40 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/200825_184240_000444_71466_galavir_sysdashboardsysjupyter.log; JOBID: 71466 Username: galavir) JOBNAME: sysdashboardsysjupyter
2025-08-20 18:42:40 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input
2025-08-20 18:42:40 INFO: [load_embeddings] multiplex=False
2025-08-20 18:42:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:42:40 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 18:42:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Running RandomForestClassifier ===
Loading all batches...
2025-08-20 18:42:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:42:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:42:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:42:48 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-20 18:42:48 INFO: [load_embeddings] labels shape: (196119,)
2025-08-20 18:42:48 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 18:42:48 INFO: [load_embeddings] paths shape: (196119,)
2025-08-20 18:42:48 INFO: [load_embeddings] multiplex=False
2025-08-20 18:42:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:42:48 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 18:42:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:42:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:42:53 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:42:54 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:42:55 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-20 18:42:55 INFO: [load_embeddings] labels shape: (141079,)
2025-08-20 18:42:55 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-20 18:42:55 INFO: [load_embeddings] paths shape: (141079,)
2025-08-20 18:42:55 INFO: [load_embeddings] multiplex=False
2025-08-20 18:42:55 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:42:55 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 18:42:55 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:43:00 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:43:01 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:43:02 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:43:03 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-20 18:43:03 INFO: [load_embeddings] labels shape: (134336,)
2025-08-20 18:43:03 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-20 18:43:03 INFO: [load_embeddings] paths shape: (134336,)
2025-08-20 18:43:03 INFO: [load_embeddings] multiplex=False
2025-08-20 18:43:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:43:03 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 18:43:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:43:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:43:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:43:12 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:43:12 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-20 18:43:12 INFO: [load_embeddings] labels shape: (189079,)
2025-08-20 18:43:12 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 18:43:12 INFO: [load_embeddings] paths shape: (189079,)
2025-08-20 18:43:12 INFO: [load_embeddings] multiplex=False
2025-08-20 18:43:12 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:43:12 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 18:43:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:43:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:43:21 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:43:23 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:43:24 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-20 18:43:24 INFO: [load_embeddings] labels shape: (169304,)
2025-08-20 18:43:24 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-20 18:43:24 INFO: [load_embeddings] paths shape: (169304,)
2025-08-20 18:43:24 INFO: [load_embeddings] multiplex=False
2025-08-20 18:43:24 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 18:43:24 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 18:43:24 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 18:43:30 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 18:43:32 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 18:43:34 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 18:43:34 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-20 18:43:34 INFO: [load_embeddings] labels shape: (196652,)
2025-08-20 18:43:34 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-20 18:43:34 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (141079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.94      0.99      0.97      3787
           1       0.87      0.90      0.88      4311
           2       0.88      0.82      0.85      3390
           3       1.00      1.00      1.00     45031
           4       0.96      0.93      0.94      4057
           5       0.85      0.98      0.91       938
           6       0.98      0.85      0.91      3613
           7       0.80      0.95      0.86       953
           8       0.93      0.96      0.95      3228
           9       0.90      0.99      0.94      4216
          10       0.95      0.94      0.95      3856
          11       0.88      0.75      0.81      1375
          12       0.93      0.97      0.95      4171
          13       1.00      0.97      0.98      3624
          14       0.99      1.00      0.99      3952
          15       0.93      0.86      0.90      4542
          16       0.98      0.99      0.98      3866
          17       0.99      1.00      1.00      3757
          18       0.91      0.57      0.70      4292
          19       0.84      0.91      0.88       786
          20       0.91      0.85      0.87      3857
          21       0.96      0.93      0.94      2469
          22       0.99      1.00      1.00      4551
          23       0.54      0.73      0.62      3546
          24       0.85      0.92      0.88      3696
          25       0.99      0.98      0.98      3615
          26       0.81      0.92      0.86      3701
          27       0.89      0.43      0.58      3897
          28       0.65      0.93      0.76      4002

    accuracy                           0.93    141079
   macro avg       0.90      0.90      0.89    141079
weighted avg       0.93      0.93      0.92    141079

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (134336, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      3867
           1       0.72      0.83      0.77      3424
           2       0.97      0.80      0.88      3987
           3       1.00      1.00      1.00     43178
           4       0.84      0.87      0.85      3476
           5       0.82      0.96      0.88      1754
           6       0.99      0.97      0.98      3703
           7       0.72      0.86      0.78      1846
           8       0.97      0.83      0.89      3826
           9       0.98      0.99      0.99      3833
          10       0.94      0.91      0.93      2963
          11       0.84      0.90      0.87       918
          12       0.93      0.96      0.94      3797
          13       1.00      0.99      0.99      3696
          14       0.94      0.99      0.96      3444
          15       0.97      0.85      0.91      3657
          16       0.95      0.99      0.97      3947
          17       0.99      1.00      0.99      3004
          18       0.89      0.85      0.87      3514
          19       0.83      0.91      0.87      1557
          20       0.72      0.80      0.76      3575
          21       0.93      0.94      0.94      2414
          22       0.99      1.00      0.99      3654
          23       0.51      0.55      0.53      3160
          24       0.89      0.97      0.92      3571
          25       0.98      0.99      0.98      3548
          26       0.71      0.94      0.81      3831
          27       0.79      0.40      0.53      3203
          28       0.68      0.49      0.57      3989

    accuracy                           0.92    134336
   macro avg       0.88      0.88      0.87    134336
weighted avg       0.92      0.92      0.91    134336

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (189079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.94      0.81      0.87      4789
           1       0.82      0.84      0.83      6170
           2       0.59      0.89      0.71      5999
           3       0.99      1.00      1.00     64486
           4       0.76      0.76      0.76      5786
           5       0.94      0.91      0.93      1726
           6       0.00      0.00      0.00        37
           7       0.77      0.95      0.85      1788
           8       0.41      0.40      0.40      6070
           9       0.94      0.01      0.01      5062
          10       0.83      0.96      0.89      5493
          11       0.20      0.53      0.30      2097
          12       0.62      0.12      0.21      5627
          13       0.40      0.00      0.00      6165
          14       0.80      0.99      0.89      4370
          15       0.88      0.75      0.81      4624
          16       0.94      0.93      0.94      4011
          17       0.45      1.00      0.62      4952
          18       0.81      0.93      0.87      5064
          19       0.69      0.92      0.79      2163
          20       0.85      0.93      0.89      5532
          21       0.78      0.68      0.72      2126
          22       1.00      0.99      1.00      5574
          23       0.59      0.62      0.61      5085
          24       0.74      0.69      0.71      4525
          25       0.83      0.93      0.88      3239
          26       0.98      0.73      0.84      5021
          27       0.72      0.70      0.71      6590
          28       0.68      0.18      0.28      4908

    accuracy                           0.79    189079
   macro avg       0.72      0.70      0.67    189079
weighted avg       0.82      0.79      0.78    189079

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (169304, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.97      0.94      0.95      4070
           1       0.83      0.52      0.64      3949
           2       0.85      0.96      0.90      5874
           3       0.99      1.00      1.00     55763
           4       0.91      0.95      0.93      5741
           5       0.79      0.95      0.86      1933
           6       0.94      0.79      0.86      5849
           7       0.75      0.94      0.84      2165
           8       0.85      0.53      0.66      5637
           9       0.82      0.96      0.88      5508
          10       0.80      0.97      0.88      3647
          11       0.90      0.45      0.60      2717
          12       0.81      0.95      0.88      5484
          13       0.99      0.94      0.96      5848
          14       0.96      0.99      0.98      5741
          15       0.87      0.71      0.78      4699
          16       0.71      0.96      0.82      4075
          17       0.88      1.00      0.93      3375
          18       0.68      0.89      0.77      2263
          19       0.74      0.93      0.82      1816
          20       0.82      0.90      0.86      4069
          21       0.83      0.91      0.87      1510
          22       0.98      1.00      0.99      5255
          23       0.52      0.63      0.57      3269
          24       0.71      0.85      0.77      3766
          25       0.96      0.90      0.93      2990
          26       0.86      0.77      0.81      3809
          27       0.77      0.55      0.64      4782
          28       0.81      0.54      0.65      3700

    accuracy                           0.89    169304
   macro avg       0.84      0.84      0.83    169304
weighted avg       0.90      0.89      0.89    169304

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (196652, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.87      0.39      0.54      3071
           1       0.91      0.88      0.90      6371
           2       0.63      0.98      0.77      6644
           3       1.00      1.00      1.00     63040
           4       0.88      0.96      0.91      6827
           5       0.95      0.98      0.96      2353
           6       0.83      0.99      0.91      6434
           7       0.86      0.95      0.90      2695
           8       0.87      0.99      0.93      6365
           9       0.98      0.79      0.88      5907
          10       0.87      0.97      0.91      5015
          11       0.79      0.45      0.57      2610
          12       0.90      0.91      0.90      6045
          13       1.00      0.96      0.98      6417
          14       0.91      0.99      0.95      6775
          15       0.58      0.62      0.60      3324
          16       0.99      0.89      0.93      4246
          17       0.98      0.99      0.99      4849
          18       0.88      0.90      0.89      6356
          19       0.83      0.83      0.83      2304
          20       0.79      0.94      0.86      5279
          21       0.63      0.65      0.64      2518
          22       0.99      1.00      0.99      3306
          23       0.59      0.50      0.54      4078
          24       0.65      0.56      0.60      4158
          25       0.77      0.99      0.86      3149
          26       0.98      0.72      0.83      5271
          27       0.71      0.74      0.72      5968
          28       0.76      0.27      0.40      5277

    accuracy                           0.89    196652
   macro avg       0.84      0.82      0.82    196652
weighted avg       0.89      0.89      0.88    196652


=== Overall Accuracy ===
0.8826791073517963 [0.925481467830081, 0.915004168651739, 0.7934091041310775, 0.8904633086046402, 0.8890374875414437]
2025-08-20 21:19:39 INFO: [load_embeddings] multiplex=False
2025-08-20 21:19:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:19:39 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 21:19:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.995018     0.841554     0.998725 0.940965 0.996183
        CLTC_WT_Untreated  0.989851     0.809618     0.995267 0.837125 0.994285
Calreticulin_WT_Untreated  0.986229     0.905654     0.988822 0.722815 0.996939
        DAPI_WT_Untreated  0.998679     0.999996     0.998039 0.995979 0.999998
       DCP1A_WT_Untreated  0.992355     0.896125     0.995451 0.863728 0.996654
        FMRP_WT_Untreated  0.998017     0.953010     0.998493 0.870135 0.999502
         FUS_WT_Untreated  0.989910     0.902271     0.992033 0.732804 0.997620
       G3BP1_WT_Untreated  0.996247     0.931195     0.996995 0.780984 0.999207
       GM130_WT_Untreated  0.985003     0.717942     0.993336 0.770700 0.991219
     HNRNPA1_WT_Untreated  0.989955     0.732447     0.997791 0.909846 0.991906
       KIF5A_WT_Untreated  0.995130     0.953323     0.996214 0.867086 0.998787
       LAMP1_WT_Untreated  0.988650     0.549861     0.993845 0.513997 0.994666
      LSM14A_WT_Untreated  0.989303     0.760707     0.996435 0.869399 0.992564
         NCL_WT_Untreated  0.991507     0.729748     0.999883 0.995023 0.991425
        NEMO_WT_Untreated  0.997105     0.993246     0.997221 0.915016 0.999796
        NONO_WT_Untreated  0.990597     0.763024     0.996456 0.847190 0.993914
       PEX14_WT_Untreated  0.996189     0.950558     0.997323 0.898255 0.998769
         PML_WT_Untreated  0.991894     0.996439     0.991782 0.748897 0.999912
       PSD95_WT_Untreated  0.991537     0.832984     0.995749 0.838840 0.995564
        PURA_WT_Untreated  0.996124     0.893809     0.997198 0.769999 0.998884
  Phalloidin_WT_Untreated  0.991654     0.893510     0.994364 0.814013 0.997052
        SNCA_WT_Untreated  0.995250     0.815348     0.997673 0.825142 0.997513
         SON_WT_Untreated  0.999652     0.995971     0.999754 0.991135 0.999889
      SQSTM1_WT_Untreated  0.979542     0.606960     0.988331 0.550965 0.990706
       TDP43_WT_Untreated  0.989274     0.786620     0.994203 0.767430 0.994808
        TIA1_WT_Untreated  0.997009     0.957741     0.997807 0.898735 0.999140
      TOMM20_WT_Untreated  0.991481     0.806222     0.996436 0.858148 0.994825
     Tubulin_WT_Untreated  0.982305     0.597668     0.993968 0.750270 0.987875
 mitotracker_WT_Untreated  0.980427     0.454791     0.994649 0.696904 0.985387
            Macro Average  0.991583     0.828564     0.995663 0.822122 0.995689

=== Running ExtraTreesClassifier ===
Loading all batches...
2025-08-20 21:19:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:19:46 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:19:47 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:19:48 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-20 21:19:48 INFO: [load_embeddings] labels shape: (196119,)
2025-08-20 21:19:48 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 21:19:48 INFO: [load_embeddings] paths shape: (196119,)
2025-08-20 21:19:48 INFO: [load_embeddings] multiplex=False
2025-08-20 21:19:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:19:48 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 21:19:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 21:19:51 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:19:53 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:19:53 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:19:54 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-20 21:19:54 INFO: [load_embeddings] labels shape: (141079,)
2025-08-20 21:19:54 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-20 21:19:54 INFO: [load_embeddings] paths shape: (141079,)
2025-08-20 21:19:54 INFO: [load_embeddings] multiplex=False
2025-08-20 21:19:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:19:54 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 21:19:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 21:19:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:19:59 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:20:00 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:20:00 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-20 21:20:00 INFO: [load_embeddings] labels shape: (134336,)
2025-08-20 21:20:00 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-20 21:20:00 INFO: [load_embeddings] paths shape: (134336,)
2025-08-20 21:20:00 INFO: [load_embeddings] multiplex=False
2025-08-20 21:20:00 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:20:00 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 21:20:00 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 21:20:04 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:20:06 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:20:08 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:20:08 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-20 21:20:08 INFO: [load_embeddings] labels shape: (189079,)
2025-08-20 21:20:08 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-20 21:20:08 INFO: [load_embeddings] paths shape: (189079,)
2025-08-20 21:20:08 INFO: [load_embeddings] multiplex=False
2025-08-20 21:20:08 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:20:08 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 21:20:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 21:20:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:20:15 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:20:17 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:20:17 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-20 21:20:17 INFO: [load_embeddings] labels shape: (169304,)
2025-08-20 21:20:17 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-20 21:20:17 INFO: [load_embeddings] paths shape: (169304,)
2025-08-20 21:20:17 INFO: [load_embeddings] multiplex=False
2025-08-20 21:20:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 21:20:17 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 21:20:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-20 21:20:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 21:20:24 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 21:20:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 21:20:26 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-20 21:20:26 INFO: [load_embeddings] labels shape: (196652,)
2025-08-20 21:20:26 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-20 21:20:26 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (141079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.94      0.99      0.97      3787
           1       0.88      0.89      0.89      4311
           2       0.86      0.83      0.85      3390
           3       1.00      1.00      1.00     45031
           4       0.96      0.92      0.94      4057
           5       0.84      0.97      0.90       938
           6       0.98      0.85      0.91      3613
           7       0.79      0.95      0.86       953
           8       0.91      0.97      0.94      3228
           9       0.91      0.99      0.95      4216
          10       0.96      0.95      0.95      3856
          11       0.88      0.71      0.78      1375
          12       0.93      0.96      0.95      4171
          13       1.00      0.97      0.98      3624
          14       0.99      0.99      0.99      3952
          15       0.94      0.86      0.90      4542
          16       0.98      0.99      0.99      3866
          17       0.99      1.00      0.99      3757
          18       0.90      0.57      0.69      4292
          19       0.86      0.90      0.88       786
          20       0.91      0.84      0.87      3857
          21       0.96      0.93      0.95      2469
          22       0.99      1.00      1.00      4551
          23       0.54      0.73      0.62      3546
          24       0.85      0.93      0.89      3696
          25       0.99      0.98      0.98      3615
          26       0.81      0.92      0.86      3701
          27       0.89      0.43      0.58      3897
          28       0.64      0.93      0.76      4002

    accuracy                           0.93    141079
   macro avg       0.90      0.89      0.89    141079
weighted avg       0.93      0.93      0.92    141079

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (134336, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.94      0.99      0.96      3867
           1       0.75      0.83      0.79      3424
           2       0.96      0.81      0.88      3987
           3       1.00      1.00      1.00     43178
           4       0.87      0.87      0.87      3476
           5       0.83      0.95      0.88      1754
           6       0.99      0.97      0.98      3703
           7       0.71      0.87      0.79      1846
           8       0.97      0.87      0.91      3826
           9       0.98      0.99      0.99      3833
          10       0.95      0.92      0.93      2963
          11       0.86      0.89      0.87       918
          12       0.93      0.96      0.95      3797
          13       1.00      0.98      0.99      3696
          14       0.94      0.99      0.96      3444
          15       0.97      0.85      0.91      3657
          16       0.94      0.99      0.97      3947
          17       0.99      1.00      0.99      3004
          18       0.88      0.84      0.86      3514
          19       0.85      0.90      0.87      1557
          20       0.73      0.81      0.76      3575
          21       0.93      0.94      0.94      2414
          22       0.99      1.00      0.99      3654
          23       0.51      0.55      0.53      3160
          24       0.88      0.97      0.92      3571
          25       0.98      0.99      0.98      3548
          26       0.70      0.94      0.80      3831
          27       0.79      0.40      0.53      3203
          28       0.67      0.51      0.58      3989

    accuracy                           0.92    134336
   macro avg       0.88      0.88      0.88    134336
weighted avg       0.92      0.92      0.91    134336

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (189079, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.95      0.81      0.88      4789
           1       0.80      0.83      0.81      6170
           2       0.58      0.89      0.70      5999
           3       0.99      1.00      1.00     64486
           4       0.75      0.76      0.76      5786
           5       0.94      0.91      0.92      1726
           6       0.00      0.00      0.00        37
           7       0.77      0.94      0.84      1788
           8       0.38      0.38      0.38      6070
           9       0.92      0.00      0.01      5062
          10       0.84      0.96      0.90      5493
          11       0.18      0.46      0.26      2097
          12       0.52      0.09      0.15      5627
          13       0.00      0.00      0.00      6165
          14       0.80      1.00      0.89      4370
          15       0.87      0.74      0.80      4624
          16       0.94      0.94      0.94      4011
          17       0.45      1.00      0.62      4952
          18       0.81      0.92      0.86      5064
          19       0.70      0.92      0.80      2163
          20       0.83      0.94      0.88      5532
          21       0.80      0.65      0.72      2126
          22       1.00      0.99      1.00      5574
          23       0.59      0.60      0.60      5085
          24       0.73      0.69      0.71      4525
          25       0.82      0.93      0.88      3239
          26       0.98      0.73      0.84      5021
          27       0.72      0.72      0.72      6590
          28       0.68      0.17      0.28      4908

    accuracy                           0.79    189079
   macro avg       0.70      0.69      0.66    189079
weighted avg       0.80      0.79      0.77    189079

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (169304, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.97      0.94      0.95      4070
           1       0.84      0.50      0.63      3949
           2       0.84      0.97      0.90      5874
           3       0.99      1.00      1.00     55763
           4       0.92      0.95      0.94      5741
           5       0.81      0.95      0.87      1933
           6       0.93      0.79      0.85      5849
           7       0.76      0.94      0.84      2165
           8       0.81      0.55      0.65      5637
           9       0.83      0.95      0.89      5508
          10       0.83      0.97      0.89      3647
          11       0.90      0.40      0.55      2717
          12       0.83      0.95      0.89      5484
          13       1.00      0.93      0.96      5848
          14       0.96      1.00      0.98      5741
          15       0.86      0.71      0.78      4699
          16       0.69      0.96      0.81      4075
          17       0.87      1.00      0.93      3375
          18       0.67      0.89      0.76      2263
          19       0.78      0.93      0.85      1816
          20       0.81      0.90      0.85      4069
          21       0.86      0.90      0.88      1510
          22       0.99      1.00      0.99      5255
          23       0.53      0.61      0.57      3269
          24       0.71      0.84      0.77      3766
          25       0.96      0.92      0.94      2990
          26       0.85      0.76      0.80      3809
          27       0.77      0.58      0.66      4782
          28       0.80      0.55      0.66      3700

    accuracy                           0.89    169304
   macro avg       0.84      0.84      0.83    169304
weighted avg       0.89      0.89      0.89    169304

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (196652, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.88      0.38      0.53      3071
           1       0.92      0.88      0.90      6371
           2       0.63      0.98      0.76      6644
           3       1.00      1.00      1.00     63040
           4       0.88      0.95      0.91      6827
           5       0.95      0.97      0.96      2353
           6       0.82      0.99      0.90      6434
           7       0.86      0.95      0.91      2695
           8       0.85      0.99      0.92      6365
           9       0.99      0.77      0.87      5907
          10       0.87      0.97      0.92      5015
          11       0.78      0.39      0.52      2610
          12       0.90      0.91      0.90      6045
          13       1.00      0.95      0.97      6417
          14       0.90      0.99      0.95      6775
          15       0.58      0.63      0.60      3324
          16       0.99      0.89      0.94      4246
          17       0.98      0.99      0.99      4849
          18       0.88      0.90      0.89      6356
          19       0.85      0.83      0.84      2304
          20       0.77      0.95      0.85      5279
          21       0.64      0.62      0.63      2518
          22       0.99      1.00      0.99      3306
          23       0.60      0.48      0.53      4078
          24       0.65      0.55      0.60      4158
          25       0.75      0.99      0.85      3149
          26       0.98      0.72      0.83      5271
          27       0.71      0.75      0.73      5968
          28       0.76      0.27      0.39      5277

    accuracy                           0.89    196652
   macro avg       0.84      0.82      0.81    196652
weighted avg       0.89      0.89      0.88    196652


=== Overall Accuracy ===
0.8817536486928201 [0.9252546445608489, 0.9163292043830396, 0.7899766764156781, 0.8903924301847564, 0.886815287919777]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.995087     0.840584     0.998819 0.945006 0.996160
        CLTC_WT_Untreated  0.989785     0.802353     0.995417 0.840265 0.994069
Calreticulin_WT_Untreated  0.985946     0.909786     0.988397 0.716201 0.997071
        DAPI_WT_Untreated  0.998273     1.000000     0.997434 0.994746 1.000000
       DCP1A_WT_Untreated  0.992550     0.893151     0.995748 0.871110 0.996559
        FMRP_WT_Untreated  0.998071     0.950827     0.998571 0.875767 0.999479
         FUS_WT_Untreated  0.989808     0.898706     0.992014 0.731573 0.997533
       G3BP1_WT_Untreated  0.996291     0.931830     0.997033 0.783255 0.999214
       GM130_WT_Untreated  0.984260     0.723991     0.992381 0.747770 0.991397
     HNRNPA1_WT_Untreated  0.989966     0.726902     0.997971 0.915994 0.991741
       KIF5A_WT_Untreated  0.995565     0.955469     0.996604 0.879372 0.998844
       LAMP1_WT_Untreated  0.988096     0.501081     0.993862 0.491471 0.994092
      LSM14A_WT_Untreated  0.989217     0.752348     0.996606 0.873677 0.992307
         NCL_WT_Untreated  0.991448     0.726252     0.999934 0.997174 0.991316
        NEMO_WT_Untreated  0.996985     0.994440     0.997061 0.910658 0.999832
        NONO_WT_Untreated  0.990468     0.761777     0.996356 0.843335 0.993881
       PEX14_WT_Untreated  0.996047     0.952991     0.997117 0.891520 0.998829
         PML_WT_Untreated  0.991754     0.996439     0.991639 0.745637 0.999912
       PSD95_WT_Untreated  0.991300     0.828610     0.995622 0.834083 0.995448
        PURA_WT_Untreated  0.996436     0.892186     0.997530 0.791281 0.998867
  Phalloidin_WT_Untreated  0.991320     0.897185     0.993919 0.802904 0.997152
        SNCA_WT_Untreated  0.995324     0.802845     0.997917 0.838475 0.997346
         SON_WT_Untreated  0.999706     0.995927     0.999811 0.993170 0.999887
      SQSTM1_WT_Untreated  0.979687     0.593845     0.988789 0.555447 0.990404
       TDP43_WT_Untreated  0.989125     0.782207     0.994157 0.765018 0.994701
        TIA1_WT_Untreated  0.996929     0.962094     0.997637 0.892190 0.999228
      TOMM20_WT_Untreated  0.991265     0.802616     0.996311 0.853344 0.994729
     Tubulin_WT_Untreated  0.982471     0.612684     0.993684 0.746275 0.988319
 mitotracker_WT_Untreated  0.980421     0.459270     0.994521 0.693997 0.985503
            Macro Average  0.991503     0.825807     0.995616 0.821404 0.995649
In [57]:
run_train_test_split_baseline(
    dataset_config,              
    batches=[1,],    
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},       
)
2025-08-19 14:18:26 INFO: [load_embeddings] multiplex=False
2025-08-19 14:18:26 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:18:26 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:18:26 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:18:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:18:33 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:18:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:18:35 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 14:18:35 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 14:18:35 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 14:18:35 INFO: [load_embeddings] paths shape: (196119,)
Train dataset
(156895,) (156895, 192) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
3: 50545
23: 3622
8: 5101
22: 3990
27: 3898
15: 3634
16: 3864
25: 3119
26: 3520
21: 1895
12: 4806
24: 3746
20: 4345
2: 5311
11: 1249
4: 4142
17: 3916
14: 4092
1: 4599
9: 4818
19: 2120
10: 3839
18: 4591
13: 5258
7: 2151
28: 3491
0: 3822
6: 5269
5: 2142
Test dataset
(39224,) (39224, 192) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
17: 979
27: 975
26: 880
3: 12636
1: 1150
6: 1317
14: 1023
24: 937
0: 955
22: 997
4: 1035
18: 1148
28: 873
13: 1315
21: 473
9: 1205
10: 960
16: 966
23: 905
7: 538
15: 908
5: 536
12: 1202
8: 1275
25: 780
2: 1328
19: 530
20: 1086
11: 312
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       955
           1       0.96      0.94      0.95      1150
           2       0.97      0.98      0.98      1328
           3       1.00      1.00      1.00     12636
           4       0.98      0.94      0.96      1035
           5       0.96      0.96      0.96       536
           6       0.99      0.99      0.99      1317
           7       0.95      0.94      0.94       538
           8       0.99      0.99      0.99      1275
           9       0.99      0.99      0.99      1205
          10       0.97      0.96      0.97       960
          11       0.96      0.93      0.95       312
          12       0.98      0.98      0.98      1202
          13       1.00      0.99      0.99      1315
          14       0.98      1.00      0.99      1023
          15       0.97      0.95      0.96       908
          16       0.99      0.99      0.99       966
          17       0.99      1.00      0.99       979
          18       0.90      0.95      0.92      1148
          19       0.94      0.92      0.93       530
          20       0.95      0.96      0.96      1086
          21       0.95      0.96      0.95       473
          22       0.99      0.99      0.99       997
          23       0.73      0.72      0.72       905
          24       0.95      0.96      0.96       937
          25       0.99      0.98      0.99       780
          26       0.95      0.95      0.95       880
          27       0.77      0.76      0.77       975
          28       0.92      0.91      0.92       873

    accuracy                           0.97     39224
   macro avg       0.95      0.95      0.95     39224
weighted avg       0.97      0.97      0.97     39224

Accuracy: 0.9686

=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.999618     0.991623     0.999817 0.992662 0.999791
        CLTC_WT_Untreated  0.997145     0.940870     0.998844 0.960924 0.998215
Calreticulin_WT_Untreated  0.998394     0.981175     0.998997 0.971663 0.999340
        DAPI_WT_Untreated  0.999694     1.000000     0.999549 0.999051 1.000000
       DCP1A_WT_Untreated  0.998011     0.943961     0.999476 0.979940 0.998483
        FMRP_WT_Untreated  0.998929     0.964552     0.999406 0.957407 0.999509
         FUS_WT_Untreated  0.999159     0.985573     0.999631 0.989329 0.999499
       G3BP1_WT_Untreated  0.998470     0.940520     0.999276 0.947566 0.999173
       GM130_WT_Untreated  0.999312     0.989020     0.999657 0.989796 0.999631
     HNRNPA1_WT_Untreated  0.999159     0.985892     0.999579 0.986711 0.999553
       KIF5A_WT_Untreated  0.998368     0.961458     0.999294 0.971579 0.999033
       LAMP1_WT_Untreated  0.999159     0.932692     0.999692 0.960396 0.999460
      LSM14A_WT_Untreated  0.998955     0.983361     0.999448 0.982544 0.999474
         NCL_WT_Untreated  0.999618     0.993156     0.999842 0.995427 0.999763
        NEMO_WT_Untreated  0.999439     0.997067     0.999503 0.981713 0.999921
        NONO_WT_Untreated  0.998088     0.948238     0.999269 0.968504 0.998774
       PEX14_WT_Untreated  0.999465     0.991718     0.999660 0.986612 0.999791
         PML_WT_Untreated  0.999745     0.998979     0.999765 0.990881 0.999974
       PSD95_WT_Untreated  0.995309     0.947735     0.996743 0.897690 0.998422
        PURA_WT_Untreated  0.998113     0.924528     0.999121 0.935115 0.998966
  Phalloidin_WT_Untreated  0.997527     0.957643     0.998663 0.953254 0.998794
        SNCA_WT_Untreated  0.998827     0.955603     0.999355 0.947589 0.999458
         SON_WT_Untreated  0.999541     0.992979     0.999712 0.989011 0.999817
      SQSTM1_WT_Untreated  0.987329     0.721547     0.993606 0.727171 0.993425
       TDP43_WT_Untreated  0.998062     0.964781     0.998877 0.954593 0.999138
        TIA1_WT_Untreated  0.999439     0.982051     0.999792 0.989664 0.999636
      TOMM20_WT_Untreated  0.997731     0.947727     0.998879 0.950969 0.998800
     Tubulin_WT_Untreated  0.988349     0.764103     0.994065 0.766461 0.993987
 mitotracker_WT_Untreated  0.996278     0.912944     0.998175 0.919262 0.998019
            Macro Average  0.997836     0.951776     0.998886 0.953224 0.998891
In [58]:
run_train_test_split_baseline(
    dataset_config,              
    batches=[1,2,3,7,8,9],    
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},       
)
2025-08-19 14:19:50 INFO: [load_embeddings] multiplex=False
2025-08-19 14:19:50 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:19:50 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:19:50 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:19:55 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:19:57 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:19:59 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:19:59 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-19 14:19:59 INFO: [load_embeddings] labels shape: (196119,)
2025-08-19 14:19:59 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 14:19:59 INFO: [load_embeddings] paths shape: (196119,)
2025-08-19 14:19:59 INFO: [load_embeddings] multiplex=False
2025-08-19 14:19:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:19:59 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 14:19:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:20:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:20:04 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:20:05 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:20:05 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-19 14:20:05 INFO: [load_embeddings] labels shape: (141079,)
2025-08-19 14:20:05 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-19 14:20:05 INFO: [load_embeddings] paths shape: (141079,)
2025-08-19 14:20:06 INFO: [load_embeddings] multiplex=False
2025-08-19 14:20:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:20:06 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 14:20:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:20:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:20:11 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:20:12 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:20:12 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-19 14:20:12 INFO: [load_embeddings] labels shape: (134336,)
2025-08-19 14:20:12 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-19 14:20:12 INFO: [load_embeddings] paths shape: (134336,)
2025-08-19 14:20:12 INFO: [load_embeddings] multiplex=False
2025-08-19 14:20:12 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:20:12 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 14:20:12 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:20:17 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:20:19 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:20:20 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:20:21 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-19 14:20:21 INFO: [load_embeddings] labels shape: (189079,)
2025-08-19 14:20:21 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-19 14:20:21 INFO: [load_embeddings] paths shape: (189079,)
2025-08-19 14:20:21 INFO: [load_embeddings] multiplex=False
2025-08-19 14:20:21 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:20:21 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 14:20:21 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:20:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:20:28 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:20:30 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:20:30 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-19 14:20:30 INFO: [load_embeddings] labels shape: (169304,)
2025-08-19 14:20:30 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-19 14:20:30 INFO: [load_embeddings] paths shape: (169304,)
2025-08-19 14:20:30 INFO: [load_embeddings] multiplex=False
2025-08-19 14:20:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:20:30 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 14:20:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-19 14:20:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:20:38 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:20:39 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:20:40 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-19 14:20:40 INFO: [load_embeddings] labels shape: (196652,)
2025-08-19 14:20:40 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-19 14:20:40 INFO: [load_embeddings] paths shape: (196652,)
Train dataset
(821255,) (821255, 192) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
4: 24851
24: 19519
3: 267743
1: 23979
8: 25202
23: 18932
16: 19980
9: 24439
28: 20992
10: 20618
6: 20978
19: 9021
0: 19489
22: 21862
14: 23518
13: 25858
7: 9709
25: 16352
2: 26026
21: 10724
15: 20310
20: 22194
18: 21782
26: 20826
27: 23450
17: 19866
11: 9023
12: 24906
5: 9106
Test dataset
(205314,) (205314, 192) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
28: 5248
0: 4872
3: 66936
22: 5465
13: 6465
8: 6300
17: 4966
25: 4088
6: 5244
23: 4733
14: 5879
4: 6213
18: 5446
1: 5995
19: 2255
24: 4880
20: 5549
15: 5078
16: 4995
2: 6507
27: 5863
26: 5207
10: 5155
12: 6226
9: 6110
5: 2276
21: 2681
11: 2255
7: 2427
              precision    recall  f1-score   support

           0       0.98      0.97      0.98      4872
           1       0.96      0.94      0.95      5995
           2       0.93      0.94      0.94      6507
           3       1.00      1.00      1.00     66936
           4       0.97      0.95      0.96      6213
           5       0.96      0.96      0.96      2276
           6       0.94      0.95      0.95      5244
           7       0.95      0.96      0.95      2427
           8       0.98      0.97      0.97      6300
           9       0.96      0.95      0.95      6110
          10       0.97      0.97      0.97      5155
          11       0.97      0.95      0.96      2255
          12       0.97      0.98      0.97      6226
          13       0.99      0.99      0.99      6465
          14       0.98      0.99      0.98      5879
          15       0.93      0.92      0.93      5078
          16       0.99      0.98      0.99      4995
          17       0.99      1.00      0.99      4966
          18       0.92      0.95      0.94      5446
          19       0.95      0.94      0.95      2255
          20       0.93      0.94      0.93      5549
          21       0.94      0.94      0.94      2681
          22       1.00      1.00      1.00      5465
          23       0.70      0.61      0.65      4733
          24       0.92      0.93      0.93      4880
          25       0.98      0.99      0.99      4088
          26       0.95      0.96      0.95      5207
          27       0.74      0.81      0.78      5863
          28       0.92      0.91      0.91      5248

    accuracy                           0.96    205314
   macro avg       0.94      0.94      0.94    205314
weighted avg       0.96      0.96      0.96    205314

Accuracy: 0.9604

=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.999006     0.973933     0.999616 0.984032 0.999367
        CLTC_WT_Untreated  0.996868     0.935780     0.998706 0.956033 0.998070
Calreticulin_WT_Untreated  0.995943     0.939604     0.997787 0.932865 0.998023
        DAPI_WT_Untreated  0.999776     1.000000     0.999668 0.999313 1.000000
       DCP1A_WT_Untreated  0.997686     0.954611     0.999031 0.968485 0.998584
        FMRP_WT_Untreated  0.999206     0.964851     0.999591 0.963581 0.999606
         FUS_WT_Untreated  0.997263     0.954043     0.998396 0.939707 0.998795
       G3BP1_WT_Untreated  0.998865     0.956737     0.999369 0.947755 0.999482
       GM130_WT_Untreated  0.998461     0.973810     0.999241 0.975978 0.999171
     HNRNPA1_WT_Untreated  0.997219     0.945172     0.998815 0.960739 0.998319
       KIF5A_WT_Untreated  0.998432     0.966634     0.999251 0.970777 0.999141
       LAMP1_WT_Untreated  0.999079     0.950333     0.999621 0.965315 0.999449
      LSM14A_WT_Untreated  0.998344     0.976068     0.999041 0.969528 0.999251
         NCL_WT_Untreated  0.999445     0.988708     0.999794 0.993627 0.999633
        NEMO_WT_Untreated  0.999118     0.990304     0.999378 0.979146 0.999714
        NONO_WT_Untreated  0.996483     0.923395     0.998337 0.933692 0.998058
       PEX14_WT_Untreated  0.999299     0.984785     0.999661 0.986365 0.999621
         PML_WT_Untreated  0.999664     0.996979     0.999730 0.989211 0.999925
       PSD95_WT_Untreated  0.996605     0.950239     0.997869 0.923942 0.998643
        PURA_WT_Untreated  0.998851     0.940576     0.999498 0.954116 0.999340
  Phalloidin_WT_Untreated  0.996420     0.942512     0.997918 0.926320 0.998402
        SNCA_WT_Untreated  0.998466     0.937337     0.999275 0.944737 0.999171
         SON_WT_Untreated  0.999805     0.996706     0.999890 0.995977 0.999910
      SQSTM1_WT_Untreated  0.984979     0.613142     0.993753 0.698436 0.990898
       TDP43_WT_Untreated  0.996479     0.927664     0.998154 0.924444 0.998239
        TIA1_WT_Untreated  0.999430     0.988503     0.999652 0.982973 0.999766
      TOMM20_WT_Untreated  0.997565     0.956405     0.998636 0.948030 0.998865
     Tubulin_WT_Untreated  0.986586     0.809995     0.991777 0.743309 0.994400
 mitotracker_WT_Untreated  0.995461     0.906250     0.997801 0.915319 0.997541
            Macro Average  0.997269     0.942934     0.998595 0.943922 0.998599

Cytoself¶

In [4]:
Cytoself_dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/",
    "multiplexed": False,
    "config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [61]:
## Baseline
run_baseline_model(
    dataset_config= Cytoself_dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
)
2025-08-19 14:33:15 INFO: [load_embeddings] multiplex=False
2025-08-19 14:33:15 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:33:15 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:33:15 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-19 14:33:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:33:43 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:33:46 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:33:47 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-19 14:33:47 INFO: [load_embeddings] labels shape: (192220,)
2025-08-19 14:33:47 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:33:47 INFO: [load_embeddings] paths shape: (192220,)
2025-08-19 14:33:48 INFO: [load_embeddings] multiplex=False
2025-08-19 14:33:48 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:33:48 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 14:33:48 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:34:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:34:32 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:34:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:34:35 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-19 14:34:35 INFO: [load_embeddings] labels shape: (137464,)
2025-08-19 14:34:35 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:34:35 INFO: [load_embeddings] paths shape: (137464,)
2025-08-19 14:34:36 INFO: [load_embeddings] multiplex=False
2025-08-19 14:34:36 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:34:36 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 14:34:36 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:35:12 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:35:16 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:35:18 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:35:19 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-19 14:35:19 INFO: [load_embeddings] labels shape: (130788,)
2025-08-19 14:35:19 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:35:19 INFO: [load_embeddings] paths shape: (130788,)
2025-08-19 14:35:20 INFO: [load_embeddings] multiplex=False
2025-08-19 14:35:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:35:20 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 14:35:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:35:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:35:52 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:35:56 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:35:58 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-19 14:35:58 INFO: [load_embeddings] labels shape: (185840,)
2025-08-19 14:35:58 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:35:58 INFO: [load_embeddings] paths shape: (185840,)
2025-08-19 14:35:59 INFO: [load_embeddings] multiplex=False
2025-08-19 14:35:59 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:35:59 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 14:35:59 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:36:30 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:36:39 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:36:44 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:36:45 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-19 14:36:45 INFO: [load_embeddings] labels shape: (166314,)
2025-08-19 14:36:45 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:36:45 INFO: [load_embeddings] paths shape: (166314,)
2025-08-19 14:36:46 INFO: [load_embeddings] multiplex=False
2025-08-19 14:36:46 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:36:46 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 14:36:46 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:37:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:37:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:37:27 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:37:28 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-19 14:37:28 INFO: [load_embeddings] labels shape: (193503,)
2025-08-19 14:37:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:37:28 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [2, 3, 7, 8, 9], Testing on: [1].

=== Batch [1] ===
Train: (813909, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 19584
CLTC_WT_Untreated: 24225
Calreticulin_WT_Untreated: 25894
DAPI_WT_Untreated: 271498
DCP1A_WT_Untreated: 25887
FMRP_WT_Untreated: 8704
FUS_WT_Untreated: 19636
G3BP1_WT_Untreated: 9447
GM130_WT_Untreated: 25126
HNRNPA1_WT_Untreated: 24526
KIF5A_WT_Untreated: 20974
LAMP1_WT_Untreated: 9717
LSM14A_WT_Untreated: 25124
NCL_WT_Untreated: 25750
NEMO_WT_Untreated: 24282
NONO_WT_Untreated: 20846
PEX14_WT_Untreated: 20145
PML_WT_Untreated: 19937
PSD95_WT_Untreated: 21489
PURA_WT_Untreated: 8626
Phalloidin_WT_Untreated: 22312
SNCA_WT_Untreated: 11037
SON_WT_Untreated: 22340
SQSTM1_WT_Untreated: 19138
TDP43_WT_Untreated: 19716
TOMM20_WT_Untreated: 21633
Tubulin_WT_Untreated: 24440
mitotracker_WT_Untreated: 21876
[W] [14:38:36.025392] L-BFGS: max iterations reached
[W] [14:38:36.026564] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4777
           1       0.94      0.99      0.96      5749
           2       0.99      0.99      0.99      6639
           3       1.00      1.00      1.00     63181
           4       0.94      0.97      0.96      5177
           5       0.97      0.94      0.96      2678
           6       0.96      0.94      0.95      6586
           7       0.99      0.90      0.94      2689
           8       0.98      0.99      0.98      6376
           9       0.96      0.95      0.95      6023
          10       0.95      0.98      0.96      4799
          11       0.95      0.89      0.92      1561
          12       0.98      0.98      0.98      6008
          13       0.99      0.99      0.99      6573
          14       1.00      1.00      1.00      5115
          15       0.93      0.97      0.95      4542
          16       0.98      0.99      0.99      4830
          17       1.00      0.99      0.99      4895
          18       0.77      0.99      0.87      5739
          19       0.95      0.87      0.91      2650
          20       0.96      0.96      0.96      5431
          21       0.98      0.97      0.97      2368
          22       0.99      1.00      0.99      4987
          23       0.82      0.76      0.79      4527
          24       0.96      0.93      0.95      4683
          25       0.96      0.96      0.96      4400
          26       0.81      0.87      0.84      4873
          27       0.96      0.58      0.72      4364

    accuracy                           0.96    192220
   macro avg       0.95      0.94      0.94    192220
weighted avg       0.97      0.96      0.96    192220

Training on Batches: [1, 3, 7, 8, 9], Testing on: [2].

=== Batch [2] ===
Train: (868665, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 20574
CLTC_WT_Untreated: 25663
Calreticulin_WT_Untreated: 29143
DAPI_WT_Untreated: 289648
DCP1A_WT_Untreated: 27007
FMRP_WT_Untreated: 10444
FUS_WT_Untreated: 22609
G3BP1_WT_Untreated: 11183
GM130_WT_Untreated: 28274
HNRNPA1_WT_Untreated: 26333
KIF5A_WT_Untreated: 21917
LAMP1_WT_Untreated: 9903
LSM14A_WT_Untreated: 26961
NCL_WT_Untreated: 28699
NEMO_WT_Untreated: 25445
NONO_WT_Untreated: 20846
PEX14_WT_Untreated: 21109
PML_WT_Untreated: 21075
PSD95_WT_Untreated: 22936
PURA_WT_Untreated: 10490
Phalloidin_WT_Untreated: 23886
SNCA_WT_Untreated: 10936
SON_WT_Untreated: 22776
SQSTM1_WT_Untreated: 20119
TDP43_WT_Untreated: 20703
TOMM20_WT_Untreated: 22332
Tubulin_WT_Untreated: 25416
mitotracker_WT_Untreated: 22238
[W] [14:39:41.418421] L-BFGS: max iterations reached
[W] [14:39:41.487878] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      3787
           1       0.97      0.99      0.98      4311
           2       0.96      0.97      0.96      3390
           3       0.99      1.00      1.00     45031
           4       0.98      0.96      0.97      4057
           5       0.94      0.97      0.96       938
           6       0.95      0.85      0.90      3613
           7       0.90      0.98      0.94       953
           8       0.98      0.99      0.98      3228
           9       0.90      0.96      0.93      4216
          10       0.97      0.96      0.97      3856
          11       0.95      0.93      0.94      1375
          12       0.96      0.98      0.97      4171
          13       0.99      0.98      0.98      3624
          14       1.00      0.99      0.99      3952
          15       0.84      0.90      0.87      4542
          16       0.99      0.99      0.99      3866
          17       1.00      0.99      0.99      3757
          18       0.99      0.95      0.97      4292
          19       0.91      0.97      0.94       786
          20       0.94      0.92      0.93      3857
          21       0.98      0.96      0.97      2469
          22       1.00      1.00      1.00      4551
          23       0.76      0.83      0.79      3546
          24       0.86      0.79      0.82      3696
          25       0.94      0.93      0.94      3701
          26       0.90      0.78      0.83      3897
          27       0.94      0.97      0.96      4002

    accuracy                           0.96    137464
   macro avg       0.94      0.95      0.94    137464
weighted avg       0.96      0.96      0.96    137464

Training on Batches: [1, 2, 7, 8, 9], Testing on: [3].

=== Batch [3] ===
Train: (875341, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 20494
CLTC_WT_Untreated: 26550
Calreticulin_WT_Untreated: 28546
DAPI_WT_Untreated: 291501
DCP1A_WT_Untreated: 27588
FMRP_WT_Untreated: 9628
FUS_WT_Untreated: 22519
G3BP1_WT_Untreated: 10290
GM130_WT_Untreated: 27676
HNRNPA1_WT_Untreated: 26716
KIF5A_WT_Untreated: 22810
LAMP1_WT_Untreated: 10360
LSM14A_WT_Untreated: 27335
NCL_WT_Untreated: 28627
NEMO_WT_Untreated: 25953
NONO_WT_Untreated: 21731
PEX14_WT_Untreated: 21028
PML_WT_Untreated: 21828
PSD95_WT_Untreated: 23714
PURA_WT_Untreated: 9719
Phalloidin_WT_Untreated: 24168
SNCA_WT_Untreated: 10991
SON_WT_Untreated: 23673
SQSTM1_WT_Untreated: 20505
TDP43_WT_Untreated: 20828
TOMM20_WT_Untreated: 22202
Tubulin_WT_Untreated: 26110
mitotracker_WT_Untreated: 22251
[W] [14:40:51.721292] L-BFGS: max iterations reached
[W] [14:40:51.722460] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.94      0.99      0.97      3867
           1       0.91      0.92      0.92      3424
           2       0.99      0.97      0.98      3987
           3       1.00      1.00      1.00     43178
           4       0.96      0.93      0.94      3476
           5       0.90      0.94      0.92      1754
           6       0.96      0.96      0.96      3703
           7       0.94      0.90      0.92      1846
           8       0.97      0.99      0.98      3826
           9       0.97      0.97      0.97      3833
          10       0.97      0.93      0.95      2963
          11       0.91      0.92      0.92       918
          12       0.96      0.97      0.97      3797
          13       0.99      0.99      0.99      3696
          14       0.99      0.96      0.97      3444
          15       0.97      0.78      0.86      3657
          16       0.95      0.99      0.97      3947
          17       0.99      0.98      0.99      3004
          18       0.97      0.96      0.97      3514
          19       0.90      0.87      0.88      1557
          20       0.80      0.88      0.84      3575
          21       0.95      0.98      0.96      2414
          22       1.00      0.99      1.00      3654
          23       0.73      0.72      0.72      3160
          24       0.81      0.97      0.89      3571
          25       0.82      0.96      0.89      3831
          26       0.91      0.71      0.80      3203
          27       0.96      0.84      0.89      3989

    accuracy                           0.95    130788
   macro avg       0.93      0.93      0.93    130788
weighted avg       0.95      0.95      0.95    130788

Training on Batches: [1, 2, 3, 8, 9], Testing on: [7].

=== Batch [7] ===
Train: (820289, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (185840, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 19572
CLTC_WT_Untreated: 23804
Calreticulin_WT_Untreated: 26534
DAPI_WT_Untreated: 270193
DCP1A_WT_Untreated: 25278
FMRP_WT_Untreated: 9656
FUS_WT_Untreated: 26185
G3BP1_WT_Untreated: 10348
GM130_WT_Untreated: 25432
HNRNPA1_WT_Untreated: 25487
KIF5A_WT_Untreated: 20280
LAMP1_WT_Untreated: 9181
LSM14A_WT_Untreated: 25505
NCL_WT_Untreated: 26158
NEMO_WT_Untreated: 25027
NONO_WT_Untreated: 20764
PEX14_WT_Untreated: 20964
PML_WT_Untreated: 19880
PSD95_WT_Untreated: 22164
PURA_WT_Untreated: 9113
Phalloidin_WT_Untreated: 22211
SNCA_WT_Untreated: 11279
SON_WT_Untreated: 21753
SQSTM1_WT_Untreated: 18580
TDP43_WT_Untreated: 19874
TOMM20_WT_Untreated: 21012
Tubulin_WT_Untreated: 22723
mitotracker_WT_Untreated: 21332
[W] [14:42:17.417366] L-BFGS: max iterations reached
[W] [14:42:17.427980] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4789
           1       0.97      0.97      0.97      6170
           2       0.94      0.82      0.88      5999
           3       1.00      1.00      1.00     64486
           4       0.94      0.87      0.90      5786
           5       0.98      0.94      0.96      1726
           6       0.00      0.00      0.00        37
           7       0.92      0.98      0.95      1788
           8       0.51      0.56      0.53      6070
           9       0.95      0.13      0.22      5062
          10       0.98      0.98      0.98      5493
          11       0.46      0.98      0.63      2097
          12       0.94      0.94      0.94      5627
          13       0.98      0.20      0.33      6165
          14       0.92      0.99      0.95      4370
          15       0.84      0.96      0.90      4624
          16       0.99      0.99      0.99      4011
          17       0.89      1.00      0.94      4952
          18       0.90      1.00      0.95      5064
          19       0.94      0.96      0.95      2163
          20       0.95      0.96      0.96      5532
          21       0.93      0.95      0.94      2126
          22       0.88      1.00      0.93      5574
          23       0.64      0.77      0.70      5085
          24       0.95      0.77      0.85      4525
          25       0.99      0.95      0.97      5021
          26       0.82      0.85      0.83      6590
          27       0.96      0.94      0.95      4908

    accuracy                           0.90    185840
   macro avg       0.86      0.84      0.82    185840
weighted avg       0.93      0.90      0.89    185840

Training on Batches: [1, 2, 3, 7, 9], Testing on: [8].

=== Batch [8] ===
Train: (839815, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (166314, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 20291
CLTC_WT_Untreated: 26025
Calreticulin_WT_Untreated: 26659
DAPI_WT_Untreated: 278916
DCP1A_WT_Untreated: 25323
FMRP_WT_Untreated: 9449
FUS_WT_Untreated: 20373
G3BP1_WT_Untreated: 9971
GM130_WT_Untreated: 25865
HNRNPA1_WT_Untreated: 25041
KIF5A_WT_Untreated: 22126
LAMP1_WT_Untreated: 8561
LSM14A_WT_Untreated: 25648
NCL_WT_Untreated: 26475
NEMO_WT_Untreated: 23656
NONO_WT_Untreated: 20689
PEX14_WT_Untreated: 20900
PML_WT_Untreated: 21457
PSD95_WT_Untreated: 24965
PURA_WT_Untreated: 9460
Phalloidin_WT_Untreated: 23674
SNCA_WT_Untreated: 11895
SON_WT_Untreated: 22072
SQSTM1_WT_Untreated: 20396
TDP43_WT_Untreated: 20633
TOMM20_WT_Untreated: 22224
Tubulin_WT_Untreated: 24531
mitotracker_WT_Untreated: 22540
[W] [14:43:29.225727] L-BFGS: max iterations reached
[W] [14:43:29.227037] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      4070
           1       0.97      0.91      0.94      3949
           2       0.97      0.98      0.97      5874
           3       0.99      1.00      1.00     55763
           4       0.97      0.98      0.98      5741
           5       0.91      0.96      0.93      1933
           6       0.95      0.39      0.56      5849
           7       0.95      0.95      0.95      2165
           8       0.99      0.73      0.84      5637
           9       0.60      0.98      0.74      5508
          10       0.98      0.98      0.98      3647
          11       0.97      0.94      0.96      2717
          12       0.97      0.98      0.98      5484
          13       0.92      0.98      0.95      5848
          14       0.99      1.00      1.00      5741
          15       0.94      0.95      0.95      4699
          16       0.92      0.98      0.95      4075
          17       0.92      0.99      0.96      3375
          18       0.99      0.97      0.98      2263
          19       0.93      0.95      0.94      1816
          20       0.92      0.93      0.92      4069
          21       0.94      0.98      0.96      1510
          22       0.97      1.00      0.98      5255
          23       0.74      0.76      0.75      3269
          24       0.95      0.92      0.94      3766
          25       0.91      0.94      0.92      3809
          26       0.85      0.83      0.84      4782
          27       0.95      0.94      0.94      3700

    accuracy                           0.94    166314
   macro avg       0.93      0.93      0.92    166314
weighted avg       0.95      0.94      0.94    166314

Training on Batches: [1, 2, 3, 7, 8], Testing on: [9].

=== Batch [9] ===
Train: (812626, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (193503, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 21290
CLTC_WT_Untreated: 23603
Calreticulin_WT_Untreated: 25889
DAPI_WT_Untreated: 271639
DCP1A_WT_Untreated: 24237
FMRP_WT_Untreated: 9029
FUS_WT_Untreated: 19788
G3BP1_WT_Untreated: 9441
GM130_WT_Untreated: 25137
HNRNPA1_WT_Untreated: 24642
KIF5A_WT_Untreated: 20758
LAMP1_WT_Untreated: 8668
LSM14A_WT_Untreated: 25087
NCL_WT_Untreated: 25906
NEMO_WT_Untreated: 22622
NONO_WT_Untreated: 22064
PEX14_WT_Untreated: 20729
PML_WT_Untreated: 19983
PSD95_WT_Untreated: 20872
PURA_WT_Untreated: 8972
Phalloidin_WT_Untreated: 22464
SNCA_WT_Untreated: 10887
SON_WT_Untreated: 24021
SQSTM1_WT_Untreated: 19587
TDP43_WT_Untreated: 20241
TOMM20_WT_Untreated: 20762
Tubulin_WT_Untreated: 23345
mitotracker_WT_Untreated: 20963
[W] [14:44:45.044685] L-BFGS: max iterations reached
[W] [14:44:45.045819] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.99      0.84      0.91      3071
           1       0.99      0.96      0.98      6371
           2       0.98      0.98      0.98      6644
           3       1.00      1.00      1.00     63040
           4       0.97      0.99      0.98      6827
           5       0.99      0.98      0.98      2353
           6       0.89      0.92      0.90      6434
           7       0.96      0.98      0.97      2695
           8       0.99      0.99      0.99      6365
           9       0.94      0.86      0.90      5907
          10       0.96      0.98      0.97      5015
          11       0.96      0.98      0.97      2610
          12       0.99      0.98      0.98      6045
          13       0.99      0.99      0.99      6417
          14       0.99      1.00      0.99      6775
          15       0.84      0.77      0.80      3324
          16       0.99      0.97      0.98      4246
          17       0.99      1.00      0.99      4849
          18       0.99      0.93      0.96      6356
          19       0.97      0.93      0.95      2304
          20       0.93      0.95      0.94      5279
          21       0.87      0.94      0.90      2518
          22       1.00      1.00      1.00      3306
          23       0.80      0.48      0.60      4078
          24       0.83      0.88      0.85      4158
          25       0.98      0.94      0.96      5271
          26       0.72      0.94      0.82      5968
          27       0.88      0.97      0.93      5277

    accuracy                           0.96    193503
   macro avg       0.94      0.93      0.93    193503
weighted avg       0.96      0.96      0.96    193503


=== Overall Accuracy ===
0.9451051678470289 [0.963271251690771, 0.9605569458185416, 0.9519221946967612, 0.8955068876452863, 0.9417968421179215, 0.9575768851128923]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.998684     0.968433     0.999435 0.977016 0.999217
        CLTC_WT_Untreated  0.997746     0.961433     0.998861 0.962847 0.998816
Calreticulin_WT_Untreated  0.997439     0.949928     0.999026 0.970237 0.998328
        DAPI_WT_Untreated  0.998120     0.998473     0.997943 0.995884 0.999238
       DCP1A_WT_Untreated  0.997252     0.951616     0.998706 0.959057 0.998459
        FMRP_WT_Untreated  0.998912     0.955368     0.999410 0.948783 0.999489
         FUS_WT_Untreated  0.989038     0.802647     0.994026 0.782387 0.994715
       G3BP1_WT_Untreated  0.998745     0.944463     0.999407 0.951124 0.999322
       GM130_WT_Untreated  0.991966     0.861660     0.996178 0.879329 0.995531
     HNRNPA1_WT_Untreated  0.989335     0.805296     0.995098 0.837253 0.993910
       KIF5A_WT_Untreated  0.998378     0.970434     0.999113 0.966385 0.999223
       LAMP1_WT_Untreated  0.996608     0.947508     0.997164 0.791145 0.999404
      LSM14A_WT_Untreated  0.998151     0.972890     0.998958 0.967544 0.999134
         NCL_WT_Untreated  0.994034     0.837175     0.999240 0.973381 0.994620
        NEMO_WT_Untreated  0.999170     0.990033     0.999445 0.981718 0.999700
        NONO_WT_Untreated  0.994671     0.898692     0.997155 0.891041 0.997377
       PEX14_WT_Untreated  0.998949     0.986667     0.999262 0.971458 0.999660
         PML_WT_Untreated  0.998828     0.993194     0.998971 0.960659 0.999828
       PSD95_WT_Untreated  0.996603     0.966432     0.997442 0.913110 0.999065
        PURA_WT_Untreated  0.998418     0.920007     0.999306 0.937636 0.999094
  Phalloidin_WT_Untreated  0.996073     0.937318     0.997739 0.921605 0.998222
        SNCA_WT_Untreated  0.998665     0.961209     0.999171 0.939962 0.999476
         SON_WT_Untreated  0.998893     0.997219     0.998940 0.963307 0.999922
      SQSTM1_WT_Untreated  0.987360     0.718149     0.993845 0.737566 0.993215
       TDP43_WT_Untreated  0.994420     0.875241     0.997382 0.892581 0.996901
      TOMM20_WT_Untreated  0.996973     0.949334     0.998238 0.934685 0.998654
     Tubulin_WT_Untreated  0.989865     0.842868     0.994276 0.815466 0.995280
 mitotracker_WT_Untreated  0.995232     0.876944     0.998400 0.936206 0.996710
            Macro Average  0.996019     0.922880     0.997933 0.919978 0.997947
In [4]:
## Baseline
run_baseline_model(
    dataset_config= Cytoself_dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
    train_specific_batches = [1],
    results_csv = 'classification_results-indi.csv'
)
2025-08-20 22:15:17 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/logs/200825_221517_523876_98867_galavir_sysdashboardsysjupyter.log; JOBID: 98867 Username: galavir) JOBNAME: sysdashboardsysjupyter
2025-08-20 22:15:17 INFO: [newNeuronsD8FigureConfig_UMAP1_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input
2025-08-20 22:15:17 INFO: [load_embeddings] multiplex=False
2025-08-20 22:15:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:15:17 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 22:15:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-20 22:15:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:15:59 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:16:02 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:16:04 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-20 22:16:04 INFO: [load_embeddings] labels shape: (192220,)
2025-08-20 22:16:04 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:16:04 INFO: [load_embeddings] paths shape: (192220,)
2025-08-20 22:16:04 INFO: [load_embeddings] multiplex=False
2025-08-20 22:16:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:16:04 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 22:16:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:16:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:16:33 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:16:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:16:36 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-20 22:16:36 INFO: [load_embeddings] labels shape: (137464,)
2025-08-20 22:16:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:16:36 INFO: [load_embeddings] paths shape: (137464,)
2025-08-20 22:16:37 INFO: [load_embeddings] multiplex=False
2025-08-20 22:16:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:16:37 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 22:16:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:17:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:17:07 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:17:10 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:17:11 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-20 22:17:11 INFO: [load_embeddings] labels shape: (130788,)
2025-08-20 22:17:11 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:17:11 INFO: [load_embeddings] paths shape: (130788,)
2025-08-20 22:17:11 INFO: [load_embeddings] multiplex=False
2025-08-20 22:17:11 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:17:11 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 22:17:11 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:17:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:17:53 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:17:56 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:17:57 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-20 22:17:57 INFO: [load_embeddings] labels shape: (185840,)
2025-08-20 22:17:57 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:17:57 INFO: [load_embeddings] paths shape: (185840,)
2025-08-20 22:17:58 INFO: [load_embeddings] multiplex=False
2025-08-20 22:17:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:17:58 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 22:17:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:18:37 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:18:44 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:18:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:18:49 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-20 22:18:49 INFO: [load_embeddings] labels shape: (166314,)
2025-08-20 22:18:49 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:18:49 INFO: [load_embeddings] paths shape: (166314,)
2025-08-20 22:18:50 INFO: [load_embeddings] multiplex=False
2025-08-20 22:18:50 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:18:50 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 22:18:50 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:19:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:19:34 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:19:38 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:19:39 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-20 22:19:39 INFO: [load_embeddings] labels shape: (193503,)
2025-08-20 22:19:39 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:19:39 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:05.026860] L-BFGS: max iterations reached
[W] [22:20:05.045447] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.96      0.99      0.97      3787
           1       0.96      0.97      0.96      4311
           2       0.94      0.95      0.95      3390
           3       0.99      1.00      1.00     45031
           4       0.97      0.94      0.96      4057
           5       0.87      0.96      0.91       938
           6       0.96      0.78      0.86      3613
           7       0.75      0.97      0.85       953
           8       0.99      0.98      0.98      3228
           9       0.86      0.98      0.91      4216
          10       0.97      0.94      0.95      3856
          11       0.92      0.88      0.90      1375
          12       0.95      0.97      0.96      4171
          13       0.99      0.98      0.99      3624
          14       1.00      0.99      0.99      3952
          15       0.93      0.91      0.92      4542
          16       0.97      0.99      0.98      3866
          17       0.99      1.00      0.99      3757
          18       0.99      0.95      0.97      4292
          19       0.85      0.88      0.87       786
          20       0.91      0.91      0.91      3857
          21       0.97      0.96      0.97      2469
          22       0.99      1.00      1.00      4551
          23       0.72      0.75      0.73      3546
          24       0.90      0.91      0.91      3696
          25       0.92      0.93      0.92      3701
          26       0.87      0.75      0.80      3897
          27       0.95      0.97      0.96      4002

    accuracy                           0.96    137464
   macro avg       0.93      0.94      0.93    137464
weighted avg       0.96      0.96      0.96    137464

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:25.653320] L-BFGS: max iterations reached
[W] [22:20:25.654092] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.96      0.99      0.97      3867
           1       0.92      0.89      0.90      3424
           2       0.98      0.96      0.97      3987
           3       0.99      1.00      1.00     43178
           4       0.94      0.92      0.93      3476
           5       0.85      0.96      0.90      1754
           6       0.98      0.95      0.96      3703
           7       0.77      0.94      0.85      1846
           8       0.98      0.96      0.97      3826
           9       0.95      0.99      0.97      3833
          10       0.96      0.93      0.94      2963
          11       0.86      0.95      0.90       918
          12       0.96      0.96      0.96      3797
          13       0.99      0.99      0.99      3696
          14       0.97      0.97      0.97      3444
          15       0.95      0.90      0.93      3657
          16       0.94      0.99      0.97      3947
          17       0.98      0.98      0.98      3004
          18       0.98      0.95      0.96      3514
          19       0.87      0.81      0.84      1557
          20       0.77      0.88      0.82      3575
          21       0.96      0.97      0.96      2414
          22       1.00      0.99      0.99      3654
          23       0.66      0.67      0.67      3160
          24       0.91      0.95      0.93      3571
          25       0.69      0.94      0.79      3831
          26       0.89      0.64      0.74      3203
          27       0.93      0.58      0.71      3989

    accuracy                           0.94    130788
   macro avg       0.91      0.91      0.91    130788
weighted avg       0.94      0.94      0.94    130788

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (185840, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:20:55.407467] L-BFGS: max iterations reached
[W] [22:20:55.412271] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.99      0.94      0.96      4789
           1       0.95      0.95      0.95      6170
           2       0.57      0.76      0.65      5999
           3       0.99      1.00      0.99     64486
           4       0.89      0.75      0.81      5786
           5       0.94      0.87      0.91      1726
           6       0.00      0.00      0.00        37
           7       0.72      0.93      0.81      1788
           8       0.83      0.44      0.58      6070
           9       0.89      0.24      0.38      5062
          10       0.93      0.97      0.95      5493
          11       0.27      0.80      0.41      2097
          12       0.92      0.81      0.86      5627
          13       0.88      0.03      0.06      6165
          14       0.90      0.99      0.94      4370
          15       0.72      0.86      0.79      4624
          16       0.95      0.96      0.96      4011
          17       0.51      0.99      0.67      4952
          18       0.91      0.98      0.94      5064
          19       0.84      0.75      0.80      2163
          20       0.90      0.93      0.92      5532
          21       0.82      0.79      0.80      2126
          22       0.98      0.99      0.99      5574
          23       0.58      0.65      0.61      5085
          24       0.76      0.59      0.67      4525
          25       0.96      0.91      0.93      5021
          26       0.73      0.86      0.79      6590
          27       0.74      0.18      0.29      4908

    accuracy                           0.84    185840
   macro avg       0.79      0.75      0.73    185840
weighted avg       0.88      0.84      0.83    185840

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (166314, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:21:23.669343] L-BFGS: max iterations reached
[W] [22:21:23.670251] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.98      0.96      0.97      4070
           1       0.94      0.86      0.90      3949
           2       0.86      0.98      0.92      5874
           3       0.99      1.00      0.99     55763
           4       0.94      0.97      0.96      5741
           5       0.87      0.91      0.89      1933
           6       0.92      0.56      0.69      5849
           7       0.75      0.93      0.83      2165
           8       0.99      0.65      0.79      5637
           9       0.64      0.97      0.77      5508
          10       0.94      0.96      0.95      3647
          11       0.94      0.74      0.83      2717
          12       0.93      0.97      0.95      5484
          13       0.96      0.89      0.92      5848
          14       0.99      1.00      0.99      5741
          15       0.85      0.83      0.84      4699
          16       0.77      0.98      0.86      4075
          17       0.89      0.98      0.93      3375
          18       0.98      0.96      0.97      2263
          19       0.78      0.75      0.77      1816
          20       0.86      0.92      0.89      4069
          21       0.93      0.95      0.94      1510
          22       0.97      1.00      0.98      5255
          23       0.68      0.60      0.64      3269
          24       0.82      0.82      0.82      3766
          25       0.79      0.90      0.84      3809
          26       0.76      0.84      0.80      4782
          27       0.93      0.45      0.61      3700

    accuracy                           0.91    166314
   macro avg       0.88      0.87      0.87    166314
weighted avg       0.92      0.91      0.91    166314

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (193503, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [22:21:52.293098] L-BFGS: max iterations reached
[W] [22:21:52.297168] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.98      0.71      0.82      3071
           1       0.97      0.95      0.96      6371
           2       0.74      0.98      0.84      6644
           3       0.99      1.00      0.99     63040
           4       0.91      0.98      0.94      6827
           5       0.97      0.95      0.96      2353
           6       0.89      0.96      0.93      6434
           7       0.83      0.95      0.89      2695
           8       0.99      0.99      0.99      6365
           9       0.89      0.88      0.89      5907
          10       0.90      0.97      0.93      5015
          11       0.91      0.76      0.83      2610
          12       0.95      0.96      0.95      6045
          13       1.00      0.90      0.95      6417
          14       0.97      1.00      0.99      6775
          15       0.55      0.57      0.56      3324
          16       0.96      0.96      0.96      4246
          17       0.96      0.99      0.97      4849
          18       0.98      0.83      0.90      6356
          19       0.85      0.65      0.74      2304
          20       0.87      0.93      0.90      5279
          21       0.79      0.82      0.81      2518
          22       0.99      0.99      0.99      3306
          23       0.67      0.38      0.49      4078
          24       0.65      0.63      0.64      4158
          25       0.95      0.90      0.92      5271
          26       0.67      0.92      0.77      5968
          27       0.64      0.47      0.55      5277

    accuracy                           0.91    193503
   macro avg       0.87      0.86      0.86    193503
weighted avg       0.91      0.91      0.91    193503


=== Overall Accuracy ===
0.9107745683549316 [0.9557920619216668, 0.9395586751078081, 0.8368596642272923, 0.9097851052827783, 0.9118773352351126]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.997599     0.926879     0.999343 0.972047 0.998199
        CLTC_WT_Untreated  0.996551     0.930444     0.998579 0.952582 0.997868
Calreticulin_WT_Untreated  0.988996     0.922801     0.991171 0.774504 0.997447
        DAPI_WT_Untreated  0.996146     0.996335     0.996051 0.992144 0.998162
       DCP1A_WT_Untreated  0.994924     0.912427     0.997635 0.926856 0.997125
        FMRP_WT_Untreated  0.998167     0.927964     0.998926 0.903265 0.999221
         FUS_WT_Untreated  0.988603     0.804492     0.993155 0.743948 0.995157
       G3BP1_WT_Untreated  0.996070     0.941992     0.996705 0.770476 0.999317
       GM130_WT_Untreated  0.992180     0.777402     0.999021 0.961980 0.992952
     HNRNPA1_WT_Untreated  0.988278     0.802821     0.994040 0.807133 0.993875
       KIF5A_WT_Untreated  0.997158     0.956041     0.998246 0.935130 0.998837
       LAMP1_WT_Untreated  0.991436     0.799732     0.993753 0.607347 0.997571
      LSM14A_WT_Untreated  0.996044     0.930664     0.998126 0.940547 0.997792
         NCL_WT_Untreated  0.990524     0.714718     0.999534 0.980449 0.990761
        NEMO_WT_Untreated  0.998680     0.990940     0.998918 0.965725 0.999721
        NONO_WT_Untreated  0.990365     0.826633     0.994669 0.802982 0.995439
       PEX14_WT_Untreated  0.997086     0.975478     0.997634 0.912769 0.999377
         PML_WT_Untreated  0.993026     0.986558     0.993189 0.784344 0.999660
       PSD95_WT_Untreated  0.997014     0.919680     0.999112 0.965603 0.997825
        PURA_WT_Untreated  0.995798     0.747044     0.998463 0.838844 0.997294
  Phalloidin_WT_Untreated  0.993840     0.916413     0.996022 0.866545 0.997640
        SNCA_WT_Untreated  0.997140     0.895714     0.998534 0.893609 0.998566
         SON_WT_Untreated  0.999454     0.994897     0.999583 0.985370 0.999856
      SQSTM1_WT_Untreated  0.983080     0.607378     0.992127 0.650075 0.990561
       TDP43_WT_Untreated  0.989883     0.767803     0.995397 0.805470 0.994242
      TOMM20_WT_Untreated  0.993653     0.912495     0.995869 0.857776 0.997607
     Tubulin_WT_Untreated  0.986436     0.822340     0.991516 0.750037 0.994484
 mitotracker_WT_Untreated  0.984240     0.514628     0.997211 0.835969 0.986735
            Macro Average  0.993299     0.865097     0.996519 0.863697 0.996546
Out[4]:
{'Accuracy': 0.9932990051713398,
 'Sensitivity': 0.8650968982184478,
 'Specificity': 0.9965188218806127,
 'PPV': 0.8636974101979668,
 'NPV': 0.9965460659251094}
In [53]:
## Baseline
run_baseline_model(
    dataset_config= Cytoself_dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
    train_specific_batches = [1],
    apply_pca = True
)
2025-08-19 14:01:28 INFO: [load_embeddings] multiplex=False
2025-08-19 14:01:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:01:28 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:01:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-08-19 14:01:52 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:01:57 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:02:00 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:02:01 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-19 14:02:01 INFO: [load_embeddings] labels shape: (192220,)
2025-08-19 14:02:01 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:02:01 INFO: [load_embeddings] paths shape: (192220,)
2025-08-19 14:02:02 INFO: [load_embeddings] multiplex=False
2025-08-19 14:02:02 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:02:02 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 14:02:02 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:02:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:02:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:02:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:02:26 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-19 14:02:26 INFO: [load_embeddings] labels shape: (137464,)
2025-08-19 14:02:26 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:02:26 INFO: [load_embeddings] paths shape: (137464,)
2025-08-19 14:02:26 INFO: [load_embeddings] multiplex=False
2025-08-19 14:02:26 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:02:26 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 14:02:26 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:02:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:02:49 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:02:51 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:02:52 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-19 14:02:52 INFO: [load_embeddings] labels shape: (130788,)
2025-08-19 14:02:52 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:02:52 INFO: [load_embeddings] paths shape: (130788,)
2025-08-19 14:02:52 INFO: [load_embeddings] multiplex=False
2025-08-19 14:02:52 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:02:52 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 14:02:52 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:03:18 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:03:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:03:26 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:03:28 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-19 14:03:28 INFO: [load_embeddings] labels shape: (185840,)
2025-08-19 14:03:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:03:28 INFO: [load_embeddings] paths shape: (185840,)
2025-08-19 14:03:28 INFO: [load_embeddings] multiplex=False
2025-08-19 14:03:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:03:28 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 14:03:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:03:55 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:04:02 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:04:05 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:04:06 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-19 14:04:06 INFO: [load_embeddings] labels shape: (166314,)
2025-08-19 14:04:06 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:04:06 INFO: [load_embeddings] paths shape: (166314,)
2025-08-19 14:04:07 INFO: [load_embeddings] multiplex=False
2025-08-19 14:04:07 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:04:07 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 14:04:07 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:04:32 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:04:38 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:04:42 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:04:43 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-19 14:04:43 INFO: [load_embeddings] labels shape: (193503,)
2025-08-19 14:04:43 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:04:43 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:05:20.534433] L-BFGS: max iterations reached
[W] [14:05:20.534632] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      3787
           1       0.89      0.92      0.91      4311
           2       0.86      0.91      0.88      3390
           3       0.97      0.98      0.98     45031
           4       0.93      0.87      0.90      4057
           5       0.75      0.91      0.83       938
           6       0.80      0.63      0.70      3613
           7       0.62      0.91      0.73       953
           8       0.95      0.95      0.95      3228
           9       0.86      0.96      0.90      4216
          10       0.96      0.90      0.93      3856
          11       0.81      0.70      0.75      1375
          12       0.88      0.94      0.91      4171
          13       0.98      0.97      0.97      3624
          14       0.99      0.99      0.99      3952
          15       0.90      0.84      0.87      4542
          16       0.94      0.97      0.96      3866
          17       0.98      0.99      0.98      3757
          18       0.97      0.86      0.91      4292
          19       0.63      0.77      0.70       786
          20       0.84      0.82      0.83      3857
          21       0.95      0.93      0.94      2469
          22       0.98      0.99      0.99      4551
          23       0.58      0.63      0.61      3546
          24       0.83      0.85      0.84      3696
          25       0.80      0.86      0.83      3701
          26       0.86      0.57      0.69      3897
          27       0.86      0.92      0.89      4002

    accuracy                           0.91    137464
   macro avg       0.87      0.88      0.87    137464
weighted avg       0.91      0.91      0.91    137464

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:06:07.521268] L-BFGS: max iterations reached
[W] [14:06:07.521472] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      3867
           1       0.78      0.78      0.78      3424
           2       0.94      0.92      0.93      3987
           3       0.98      0.98      0.98     43178
           4       0.86      0.78      0.82      3476
           5       0.74      0.91      0.82      1754
           6       0.86      0.81      0.83      3703
           7       0.64      0.82      0.72      1846
           8       0.96      0.91      0.93      3826
           9       0.90      0.97      0.93      3833
          10       0.94      0.90      0.92      2963
          11       0.72      0.84      0.77       918
          12       0.89      0.93      0.91      3797
          13       0.98      0.98      0.98      3696
          14       0.95      0.95      0.95      3444
          15       0.96      0.89      0.92      3657
          16       0.89      0.97      0.93      3947
          17       0.97      0.97      0.97      3004
          18       0.95      0.93      0.94      3514
          19       0.61      0.73      0.66      1557
          20       0.66      0.76      0.71      3575
          21       0.92      0.93      0.92      2414
          22       0.98      0.98      0.98      3654
          23       0.49      0.51      0.50      3160
          24       0.90      0.94      0.92      3571
          25       0.55      0.88      0.68      3831
          26       0.83      0.40      0.54      3203
          27       0.73      0.26      0.39      3989

    accuracy                           0.89    130788
   macro avg       0.84      0.84      0.83    130788
weighted avg       0.89      0.89      0.88    130788

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (185840, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:06:47.266808] L-BFGS: max iterations reached
[W] [14:06:47.267158] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.98      0.88      0.92      4789
           1       0.92      0.89      0.91      6170
           2       0.55      0.76      0.64      5999
           3       0.97      0.98      0.97     64486
           4       0.69      0.59      0.63      5786
           5       0.92      0.75      0.83      1726
           6       0.00      0.00      0.00        37
           7       0.62      0.84      0.72      1788
           8       0.63      0.39      0.48      6070
           9       0.37      0.10      0.15      5062
          10       0.88      0.95      0.91      5493
          11       0.11      0.35      0.16      2097
          12       0.75      0.54      0.63      5627
          13       0.34      0.00      0.01      6165
          14       0.84      0.98      0.90      4370
          15       0.66      0.87      0.75      4624
          16       0.84      0.93      0.88      4011
          17       0.46      0.99      0.63      4952
          18       0.91      0.96      0.94      5064
          19       0.65      0.49      0.56      2163
          20       0.85      0.87      0.86      5532
          21       0.83      0.61      0.70      2126
          22       0.94      0.99      0.96      5574
          23       0.50      0.60      0.55      5085
          24       0.69      0.45      0.55      4525
          25       0.88      0.81      0.85      5021
          26       0.71      0.80      0.75      6590
          27       0.55      0.12      0.19      4908

    accuracy                           0.78    185840
   macro avg       0.68      0.66      0.64    185840
weighted avg       0.79      0.78      0.77    185840

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (166314, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:07:22.568206] L-BFGS: max iterations reached
[W] [14:07:22.568488] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.96      0.94      0.95      4070
           1       0.90      0.80      0.85      3949
           2       0.83      0.97      0.90      5874
           3       0.96      0.97      0.96     55763
           4       0.82      0.93      0.87      5741
           5       0.83      0.83      0.83      1933
           6       0.82      0.41      0.55      5849
           7       0.66      0.84      0.74      2165
           8       0.96      0.55      0.70      5637
           9       0.56      0.92      0.69      5508
          10       0.90      0.95      0.92      3647
          11       0.79      0.34      0.47      2717
          12       0.73      0.95      0.83      5484
          13       0.94      0.81      0.87      5848
          14       0.98      0.99      0.99      5741
          15       0.82      0.75      0.78      4699
          16       0.84      0.96      0.90      4075
          17       0.84      0.98      0.90      3375
          18       0.95      0.92      0.94      2263
          19       0.57      0.59      0.58      1816
          20       0.82      0.84      0.83      4069
          21       0.92      0.88      0.90      1510
          22       0.96      0.99      0.97      5255
          23       0.56      0.57      0.57      3269
          24       0.74      0.79      0.76      3766
          25       0.64      0.84      0.73      3809
          26       0.73      0.76      0.74      4782
          27       0.80      0.23      0.36      3700

    accuracy                           0.86    166314
   macro avg       0.82      0.80      0.79    166314
weighted avg       0.86      0.86      0.85    166314

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (193503, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
[W] [14:07:58.072874] L-BFGS: max iterations reached
[W] [14:07:58.073265] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.92      0.60      0.73      3071
           1       0.95      0.89      0.92      6371
           2       0.70      0.96      0.81      6644
           3       0.97      0.97      0.97     63040
           4       0.78      0.95      0.86      6827
           5       0.96      0.88      0.92      2353
           6       0.83      0.84      0.84      6434
           7       0.79      0.90      0.84      2695
           8       0.97      0.94      0.95      6365
           9       0.70      0.75      0.72      5907
          10       0.83      0.95      0.89      5015
          11       0.71      0.37      0.49      2610
          12       0.85      0.93      0.89      6045
          13       0.99      0.80      0.88      6417
          14       0.96      1.00      0.98      6775
          15       0.46      0.52      0.49      3324
          16       0.89      0.90      0.90      4246
          17       0.95      0.98      0.97      4849
          18       0.96      0.82      0.89      6356
          19       0.74      0.55      0.63      2304
          20       0.81      0.85      0.83      5279
          21       0.67      0.65      0.66      2518
          22       0.95      0.99      0.97      3306
          23       0.51      0.37      0.43      4078
          24       0.55      0.53      0.54      4158
          25       0.86      0.80      0.83      5271
          26       0.65      0.86      0.74      5968
          27       0.55      0.33      0.42      5277

    accuracy                           0.86    193503
   macro avg       0.80      0.78      0.78    193503
weighted avg       0.86      0.86      0.85    193503


=== Overall Accuracy ===
0.8586465820142102 [0.9125880230460339, 0.8866256843135456, 0.7801657339647008, 0.8551174284786609, 0.8587360402681096]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.996140     0.889910     0.998759 0.946454 0.997290
        CLTC_WT_Untreated  0.993174     0.867121     0.997041 0.899884 0.995928
Calreticulin_WT_Untreated  0.986691     0.904070     0.989406 0.737137 0.996824
        DAPI_WT_Untreated  0.980892     0.975149     0.983767 0.967813 0.987514
       DCP1A_WT_Untreated  0.988134     0.828176     0.993389 0.804495 0.994350
        FMRP_WT_Untreated  0.996744     0.852482     0.998304 0.844525 0.998405
         FUS_WT_Untreated  0.985226     0.668670     0.993052 0.704059 0.991819
       G3BP1_WT_Untreated  0.993596     0.857944     0.995189 0.676827 0.998327
       GM130_WT_Untreated  0.988717     0.716429     0.997391 0.897403 0.991025
     HNRNPA1_WT_Untreated  0.982037     0.722050     0.990115 0.694144 0.991353
       KIF5A_WT_Untreated  0.995389     0.933584     0.997024 0.892439 0.998241
       LAMP1_WT_Untreated  0.984480     0.447257     0.990971 0.374429 0.993306
      LSM14A_WT_Untreated  0.989421     0.848989     0.993894 0.815804 0.995184
         NCL_WT_Untreated  0.988560     0.660350     0.999283 0.967841 0.989017
        NEMO_WT_Untreated  0.997734     0.983815     0.998162 0.942739 0.999502
        NONO_WT_Untreated  0.987743     0.782980     0.993125 0.749610 0.994289
       PEX14_WT_Untreated  0.995479     0.944701     0.996767 0.881187 0.998594
         PML_WT_Untreated  0.991311     0.982946     0.991521 0.744312 0.999568
       PSD95_WT_Untreated  0.995760     0.889944     0.998630 0.946264 0.997020
        PURA_WT_Untreated  0.992192     0.595061     0.996446 0.642026 0.995666
  Phalloidin_WT_Untreated  0.989742     0.834887     0.994107 0.799725 0.995340
        SNCA_WT_Untreated  0.995384     0.797590     0.998103 0.852508 0.997220
         SON_WT_Untreated  0.998571     0.988272     0.998862 0.960790 0.999669
      SQSTM1_WT_Untreated  0.977769     0.536576     0.988393 0.526777 0.988836
       TDP43_WT_Untreated  0.986749     0.696135     0.993964 0.741131 0.992468
      TOMM20_WT_Untreated  0.987554     0.833680     0.991755 0.734115 0.995442
     Tubulin_WT_Untreated  0.983031     0.717308     0.991257 0.717513 0.991249
 mitotracker_WT_Untreated  0.979046     0.363641     0.996043 0.717378 0.982660
            Macro Average  0.989545     0.789990     0.994454 0.792119 0.994504
In [55]:
run_train_test_split_baseline(
    Cytoself_dataset_config,              
    batches=[1,],    
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},       
)
2025-08-19 14:17:33 INFO: [load_embeddings] multiplex=False
2025-08-19 14:17:33 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:17:33 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:17:33 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:17:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:18:02 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:18:05 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:18:06 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-19 14:18:06 INFO: [load_embeddings] labels shape: (192220,)
2025-08-19 14:18:06 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:18:06 INFO: [load_embeddings] paths shape: (192220,)
Train dataset
(153776,) (153776, 2048) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
1: 4599
22: 3990
12: 4807
23: 3622
13: 5258
26: 3898
18: 4591
3: 50545
27: 3491
14: 4092
8: 5101
5: 2142
10: 3839
4: 4142
7: 2151
16: 3864
9: 4818
21: 1894
24: 3746
19: 2120
2: 5311
0: 3822
6: 5269
20: 4345
17: 3916
25: 3520
15: 3634
11: 1249
Test dataset
(38444,) (38444, 2048) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
1: 1150
18: 1148
3: 12636
15: 908
6: 1317
10: 960
17: 979
24: 937
14: 1023
0: 955
19: 530
13: 1315
23: 905
11: 312
7: 538
26: 975
20: 1086
12: 1201
16: 966
22: 997
25: 880
27: 873
4: 1035
5: 536
9: 1205
2: 1328
8: 1275
21: 474
[W] [14:18:21.983054] L-BFGS: max iterations reached
[W] [14:18:21.995735] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       955
           1       0.98      0.98      0.98      1150
           2       0.99      0.99      0.99      1328
           3       1.00      1.00      1.00     12636
           4       0.97      0.97      0.97      1035
           5       0.97      0.97      0.97       536
           6       0.98      0.97      0.98      1317
           7       0.95      0.96      0.95       538
           8       0.99      0.99      0.99      1275
           9       0.99      0.98      0.98      1205
          10       0.98      0.97      0.98       960
          11       0.96      0.92      0.94       312
          12       0.98      0.99      0.99      1201
          13       1.00      0.99      0.99      1315
          14       0.99      1.00      1.00      1023
          15       0.96      0.95      0.96       908
          16       0.99      0.99      0.99       966
          17       0.99      1.00      0.99       979
          18       0.98      0.98      0.98      1148
          19       0.98      0.95      0.96       530
          20       0.95      0.96      0.95      1086
          21       0.98      0.98      0.98       474
          22       0.99      0.99      0.99       997
          23       0.79      0.81      0.80       905
          24       0.96      0.96      0.96       937
          25       0.97      0.97      0.97       880
          26       0.84      0.83      0.83       975
          27       0.97      0.97      0.97       873

    accuracy                           0.98     38444
   macro avg       0.97      0.96      0.97     38444
weighted avg       0.98      0.98      0.98     38444

Accuracy: 0.9770

=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.999506     0.989529     0.999760 0.990566 0.999733
        CLTC_WT_Untreated  0.998595     0.975652     0.999303 0.977352 0.999249
Calreticulin_WT_Untreated  0.999324     0.990211     0.999650 0.990211 0.999650
        DAPI_WT_Untreated  0.998335     0.998180     0.998411 0.996760 0.999108
       DCP1A_WT_Untreated  0.998439     0.972947     0.999145 0.969201 0.999251
        FMRP_WT_Untreated  0.999220     0.973881     0.999578 0.970260 0.999631
         FUS_WT_Untreated  0.998335     0.974184     0.999192 0.977152 0.999084
       G3BP1_WT_Untreated  0.998725     0.957249     0.999314 0.951941 0.999393
       GM130_WT_Untreated  0.999428     0.994510     0.999596 0.988309 0.999812
     HNRNPA1_WT_Untreated  0.999012     0.980083     0.999624 0.988285 0.999356
       KIF5A_WT_Untreated  0.998803     0.972917     0.999466 0.979036 0.999306
       LAMP1_WT_Untreated  0.999038     0.923077     0.999659 0.956811 0.999371
      LSM14A_WT_Untreated  0.999116     0.988343     0.999463 0.983430 0.999624
         NCL_WT_Untreated  0.999584     0.991635     0.999865 0.996180 0.999704
        NEMO_WT_Untreated  0.999792     0.998045     0.999840 0.994158 0.999947
        NONO_WT_Untreated  0.998049     0.953744     0.999121 0.963293 0.998881
       PEX14_WT_Untreated  0.999558     0.989648     0.999813 0.992731 0.999733
         PML_WT_Untreated  0.999688     0.995914     0.999786 0.991862 0.999893
       PSD95_WT_Untreated  0.998673     0.979965     0.999249 0.975716 0.999383
        PURA_WT_Untreated  0.998986     0.949057     0.999683 0.976699 0.999288
  Phalloidin_WT_Untreated  0.997451     0.955801     0.998662 0.954044 0.998715
        SNCA_WT_Untreated  0.999480     0.978903     0.999737 0.978903 0.999737
         SON_WT_Untreated  0.999740     0.994985     0.999866 0.994985 0.999866
      SQSTM1_WT_Untreated  0.990428     0.808840     0.994805 0.789644 0.995389
       TDP43_WT_Untreated  0.997997     0.962647     0.998880 0.955508 0.999067
      TOMM20_WT_Untreated  0.998543     0.968182     0.999255 0.968182 0.999255
     Tubulin_WT_Untreated  0.991702     0.827692     0.995970 0.842380 0.995518
 mitotracker_WT_Untreated  0.998517     0.966781     0.999255 0.967890 0.999228
            Macro Average  0.998359     0.964736     0.999141 0.966482 0.999149
In [60]:
run_train_test_split_baseline(
    Cytoself_dataset_config,              
    batches=[1,2,3,7,8,9],    
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},       
)
2025-08-19 14:21:18 INFO: [load_embeddings] multiplex=False
2025-08-19 14:21:18 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:21:18 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-19 14:21:18 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:21:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:21:36 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:21:38 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:21:39 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-19 14:21:39 INFO: [load_embeddings] labels shape: (192220,)
2025-08-19 14:21:39 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:21:39 INFO: [load_embeddings] paths shape: (192220,)
2025-08-19 14:21:40 INFO: [load_embeddings] multiplex=False
2025-08-19 14:21:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:21:40 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-19 14:21:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:21:56 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:22:00 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:22:03 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:22:03 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-19 14:22:03 INFO: [load_embeddings] labels shape: (137464,)
2025-08-19 14:22:03 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:22:03 INFO: [load_embeddings] paths shape: (137464,)
2025-08-19 14:22:04 INFO: [load_embeddings] multiplex=False
2025-08-19 14:22:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:22:04 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-19 14:22:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:22:22 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:22:26 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:22:29 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:22:29 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-19 14:22:29 INFO: [load_embeddings] labels shape: (130788,)
2025-08-19 14:22:29 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:22:29 INFO: [load_embeddings] paths shape: (130788,)
2025-08-19 14:22:30 INFO: [load_embeddings] multiplex=False
2025-08-19 14:22:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:22:30 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-19 14:22:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:22:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:23:00 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:23:03 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:23:04 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-19 14:23:04 INFO: [load_embeddings] labels shape: (185840,)
2025-08-19 14:23:04 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:23:04 INFO: [load_embeddings] paths shape: (185840,)
2025-08-19 14:23:04 INFO: [load_embeddings] multiplex=False
2025-08-19 14:23:04 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:23:04 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-19 14:23:04 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:23:32 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:23:38 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:23:41 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:23:43 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-19 14:23:43 INFO: [load_embeddings] labels shape: (166314,)
2025-08-19 14:23:43 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:23:43 INFO: [load_embeddings] paths shape: (166314,)
2025-08-19 14:23:43 INFO: [load_embeddings] multiplex=False
2025-08-19 14:23:43 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-19 14:23:43 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-19 14:23:43 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-19 14:24:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-19 14:24:15 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-19 14:24:19 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-19 14:24:20 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-19 14:24:20 INFO: [load_embeddings] labels shape: (193503,)
2025-08-19 14:24:20 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-19 14:24:20 INFO: [load_embeddings] paths shape: (193503,)
Train dataset
(804903,) (804903, 2048) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
15: 20310
3: 267743
23: 18932
16: 19980
6: 20978
14: 23518
25: 20826
9: 24439
26: 23450
19: 9021
2: 26026
20: 22194
4: 24851
27: 20992
1: 23979
22: 21862
11: 9023
21: 10724
5: 9106
12: 24906
8: 25202
18: 21782
0: 19489
7: 9709
13: 25858
17: 19866
24: 19519
10: 20618
Test dataset
(201226,) (201226, 2048) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
20: 5549
12: 6226
25: 5207
26: 5863
23: 4733
11: 2255
3: 66936
9: 6110
17: 4966
10: 5155
4: 6213
24: 4880
21: 2681
8: 6300
14: 5879
1: 5995
6: 5244
27: 5248
18: 5446
2: 6507
0: 4872
5: 2276
22: 5465
13: 6465
16: 4995
15: 5078
19: 2255
7: 2427
[W] [14:25:36.989005] L-BFGS: max iterations reached
[W] [14:25:36.996768] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      4872
           1       0.97      0.97      0.97      5995
           2       0.98      0.97      0.97      6507
           3       1.00      1.00      1.00     66936
           4       0.97      0.97      0.97      6213
           5       0.96      0.97      0.97      2276
           6       0.89      0.90      0.90      5244
           7       0.96      0.96      0.96      2427
           8       0.98      0.98      0.98      6300
           9       0.92      0.91      0.91      6110
          10       0.98      0.97      0.98      5155
          11       0.96      0.96      0.96      2255
          12       0.98      0.98      0.98      6226
          13       0.99      0.99      0.99      6465
          14       0.99      0.99      0.99      5879
          15       0.93      0.93      0.93      5078
          16       0.99      0.99      0.99      4995
          17       0.99      1.00      0.99      4966
          18       0.98      0.98      0.98      5446
          19       0.95      0.94      0.95      2255
          20       0.94      0.94      0.94      5549
          21       0.97      0.97      0.97      2681
          22       1.00      1.00      1.00      5465
          23       0.78      0.75      0.77      4733
          24       0.92      0.92      0.92      4880
          25       0.96      0.96      0.96      5207
          26       0.84      0.86      0.85      5863
          27       0.96      0.96      0.96      5248

    accuracy                           0.97    201226
   macro avg       0.95      0.95      0.95    201226
weighted avg       0.97      0.97      0.97    201226

Accuracy: 0.9679

=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.999175     0.981938     0.999603 0.983957 0.999552
        CLTC_WT_Untreated  0.998400     0.974812     0.999124 0.971571 0.999226
Calreticulin_WT_Untreated  0.998350     0.972645     0.999209 0.976246 0.999086
        DAPI_WT_Untreated  0.998504     0.998431     0.998540 0.997076 0.999218
       DCP1A_WT_Untreated  0.998231     0.969902     0.999133 0.972720 0.999041
        FMRP_WT_Untreated  0.999230     0.969684     0.999568 0.962495 0.999653
         FUS_WT_Untreated  0.994722     0.904081     0.997148 0.894528 0.997433
       G3BP1_WT_Untreated  0.999081     0.964153     0.999507 0.959803 0.999562
       GM130_WT_Untreated  0.998882     0.982540     0.999410 0.981761 0.999436
     HNRNPA1_WT_Untreated  0.994817     0.906383     0.997586 0.921618 0.997070
       KIF5A_WT_Untreated  0.998852     0.974394     0.999495 0.980672 0.999327
       LAMP1_WT_Untreated  0.999101     0.956098     0.999588 0.963360 0.999502
      LSM14A_WT_Untreated  0.998842     0.981850     0.999385 0.980748 0.999420
         NCL_WT_Untreated  0.999448     0.990719     0.999738 0.992100 0.999692
        NEMO_WT_Untreated  0.999443     0.992856     0.999642 0.988150 0.999785
        NONO_WT_Untreated  0.996397     0.926152     0.998216 0.930734 0.998088
       PEX14_WT_Untreated  0.999448     0.987788     0.999745 0.989968 0.999689
         PML_WT_Untreated  0.999742     0.995570     0.999847 0.993969 0.999888
       PSD95_WT_Untreated  0.998912     0.982740     0.999362 0.977177 0.999520
        PURA_WT_Untreated  0.998807     0.941907     0.999452 0.951187 0.999342
  Phalloidin_WT_Untreated  0.996601     0.939268     0.998227 0.937579 0.998278
        SNCA_WT_Untreated  0.999240     0.972771     0.999597 0.970238 0.999632
         SON_WT_Untreated  0.999846     0.996706     0.999934 0.997619 0.999908
      SQSTM1_WT_Untreated  0.989276     0.751743     0.994997 0.783528 0.994026
       TDP43_WT_Untreated  0.996114     0.919057     0.998029 0.920567 0.997988
      TOMM20_WT_Untreated  0.997689     0.955445     0.998811 0.955261 0.998816
     Tubulin_WT_Untreated  0.990926     0.857240     0.994938 0.835578 0.995712
 mitotracker_WT_Untreated  0.997818     0.961128     0.998801 0.955484 0.998959
            Macro Average  0.997710     0.953857     0.998808 0.954489 0.998816
In [ ]:
for clf_class, clf_kwargs in additional_classifiers:
    print(f"\n=== Running {clf_class.__name__} ===")
    run_baseline_model(
        dataset_config=Cytoself_dataset_config,
        batches=[1, 2, 3, 7, 8, 9],
        classifier_class=clf_class,
        classifier_kwargs=clf_kwargs,
        train_specific_batches=[1],
        results_csv="classification_results-indi.csv"
    )
2025-08-20 22:21:58 INFO: [load_embeddings] multiplex=False
2025-08-20 22:21:58 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:21:58 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 22:21:58 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running LinearSVC ===
Loading all batches...
2025-08-20 22:22:33 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:22:39 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:22:43 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:22:44 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-20 22:22:44 INFO: [load_embeddings] labels shape: (192220,)
2025-08-20 22:22:44 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:22:44 INFO: [load_embeddings] paths shape: (192220,)
2025-08-20 22:22:45 INFO: [load_embeddings] multiplex=False
2025-08-20 22:22:45 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:22:45 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 22:22:45 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:23:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:23:13 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:23:16 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:23:17 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-20 22:23:17 INFO: [load_embeddings] labels shape: (137464,)
2025-08-20 22:23:17 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:23:17 INFO: [load_embeddings] paths shape: (137464,)
2025-08-20 22:23:17 INFO: [load_embeddings] multiplex=False
2025-08-20 22:23:17 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:23:17 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 22:23:17 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:23:43 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:23:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:23:50 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:23:51 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-20 22:23:51 INFO: [load_embeddings] labels shape: (130788,)
2025-08-20 22:23:51 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:23:51 INFO: [load_embeddings] paths shape: (130788,)
2025-08-20 22:23:51 INFO: [load_embeddings] multiplex=False
2025-08-20 22:23:51 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:23:51 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 22:23:51 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:24:25 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:24:31 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:24:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:24:36 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-20 22:24:36 INFO: [load_embeddings] labels shape: (185840,)
2025-08-20 22:24:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:24:36 INFO: [load_embeddings] paths shape: (185840,)
2025-08-20 22:24:37 INFO: [load_embeddings] multiplex=False
2025-08-20 22:24:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:24:37 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 22:24:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:25:15 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:25:22 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:25:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:25:26 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-20 22:25:26 INFO: [load_embeddings] labels shape: (166314,)
2025-08-20 22:25:26 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:25:26 INFO: [load_embeddings] paths shape: (166314,)
2025-08-20 22:25:27 INFO: [load_embeddings] multiplex=False
2025-08-20 22:25:27 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:25:27 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 22:25:27 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:26:03 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:26:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:26:14 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:26:15 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-20 22:26:15 INFO: [load_embeddings] labels shape: (193503,)
2025-08-20 22:26:15 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:26:15 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3787
           1       0.94      0.95      0.95      4311
           2       0.91      0.95      0.93      3390
           3       1.00      0.99      0.99     45031
           4       0.96      0.91      0.94      4057
           5       0.84      0.94      0.88       938
           6       0.97      0.64      0.78      3613
           7       0.71      0.95      0.81       953
           8       0.97      0.98      0.98      3228
           9       0.66      0.99      0.79      4216
          10       0.96      0.93      0.94      3856
          11       0.90      0.88      0.89      1375
          12       0.95      0.96      0.95      4171
          13       1.00      0.95      0.97      3624
          14       0.99      0.99      0.99      3952
          15       0.95      0.86      0.90      4542
          16       0.96      0.99      0.98      3866
          17       0.99      1.00      0.99      3757
          18       0.98      0.92      0.95      4292
          19       0.77      0.83      0.80       786
          20       0.89      0.85      0.87      3857
          21       0.95      0.96      0.96      2469
          22       0.99      0.98      0.99      4551
          23       0.72      0.68      0.70      3546
          24       0.88      0.89      0.88      3696
          25       0.90      0.90      0.90      3701
          26       0.84      0.80      0.82      3897
          27       0.94      0.96      0.95      4002

    accuracy                           0.94    137464
   macro avg       0.91      0.92      0.91    137464
weighted avg       0.94      0.94      0.94    137464

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      3867
           1       0.91      0.86      0.88      3424
           2       0.95      0.96      0.95      3987
           3       1.00      0.98      0.99     43178
           4       0.94      0.88      0.91      3476
           5       0.83      0.94      0.88      1754
           6       0.99      0.91      0.94      3703
           7       0.75      0.89      0.81      1846
           8       0.98      0.96      0.97      3826
           9       0.77      1.00      0.87      3833
          10       0.95      0.90      0.92      2963
          11       0.84      0.94      0.89       918
          12       0.96      0.95      0.95      3797
          13       0.99      0.97      0.98      3696
          14       0.95      0.97      0.96      3444
          15       0.97      0.86      0.91      3657
          16       0.93      0.99      0.96      3947
          17       0.97      0.99      0.98      3004
          18       0.97      0.94      0.95      3514
          19       0.82      0.73      0.77      1557
          20       0.72      0.79      0.75      3575
          21       0.93      0.96      0.95      2414
          22       0.99      0.98      0.99      3654
          23       0.64      0.63      0.64      3160
          24       0.88      0.95      0.91      3571
          25       0.73      0.91      0.81      3831
          26       0.76      0.67      0.72      3203
          27       0.93      0.62      0.74      3989

    accuracy                           0.93    130788
   macro avg       0.89      0.90      0.89    130788
weighted avg       0.93      0.93      0.92    130788

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (185840, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
              precision    recall  f1-score   support

           0       0.99      0.93      0.96      4789
           1       0.94      0.92      0.93      6170
           2       0.54      0.76      0.63      5999
           3       0.99      0.99      0.99     64486
           4       0.89      0.68      0.77      5786
           5       0.92      0.84      0.87      1726
           6       0.00      0.00      0.00        37
           7       0.68      0.84      0.75      1788
           8       0.81      0.46      0.58      6070
           9       0.62      0.30      0.40      5062
          10       0.94      0.95      0.95      5493
          11       0.28      0.83      0.42      2097
          12       0.90      0.75      0.82      5627
          13       0.65      0.01      0.01      6165
          14       0.84      0.99      0.91      4370
          15       0.77      0.83      0.80      4624
          16       0.95      0.97      0.96      4011
          17       0.50      0.99      0.66      4952
          18       0.89      0.96      0.92      5064
          19       0.83      0.73      0.78      2163
          20       0.89      0.88      0.89      5532
          21       0.60      0.77      0.68      2126
          22       0.99      0.98      0.99      5574
          23       0.51      0.49      0.50      5085
          24       0.73      0.66      0.69      4525
          25       0.96      0.86      0.90      5021
          26       0.65      0.92      0.76      6590
          27       0.58      0.11      0.19      4908

    accuracy                           0.82    185840
   macro avg       0.74      0.73      0.70    185840
weighted avg       0.85      0.82      0.81    185840

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (166314, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
              precision    recall  f1-score   support

           0       0.97      0.96      0.97      4070
           1       0.93      0.79      0.86      3949
           2       0.78      0.98      0.87      5874
           3       0.99      0.97      0.98     55763
           4       0.94      0.95      0.95      5741
           5       0.84      0.88      0.86      1933
           6       0.92      0.42      0.58      5849
           7       0.75      0.89      0.81      2165
           8       0.97      0.70      0.82      5637
           9       0.44      0.99      0.61      5508
          10       0.94      0.95      0.94      3647
          11       0.93      0.75      0.83      2717
          12       0.95      0.95      0.95      5484
          13       0.95      0.69      0.80      5848
          14       0.98      0.99      0.99      5741
          15       0.88      0.79      0.83      4699
          16       0.81      0.97      0.88      4075
          17       0.88      0.98      0.93      3375
          18       0.96      0.93      0.94      2263
          19       0.80      0.75      0.77      1816
          20       0.87      0.86      0.86      4069
          21       0.88      0.94      0.91      1510
          22       0.96      0.99      0.98      5255
          23       0.57      0.45      0.50      3269
          24       0.82      0.86      0.84      3766
          25       0.83      0.87      0.85      3809
          26       0.70      0.89      0.78      4782
          27       0.91      0.42      0.57      3700

    accuracy                           0.88    166314
   macro avg       0.86      0.84      0.84    166314
weighted avg       0.90      0.88      0.88    166314

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (193503, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
              precision    recall  f1-score   support

           0       0.98      0.72      0.83      3071
           1       0.97      0.93      0.95      6371
           2       0.68      0.98      0.80      6644
           3       0.99      0.97      0.98     63040
           4       0.92      0.96      0.94      6827
           5       0.96      0.94      0.95      2353
           6       0.92      0.97      0.94      6434
           7       0.79      0.92      0.85      2695
           8       0.98      0.99      0.98      6365
           9       0.64      0.94      0.76      5907
          10       0.93      0.95      0.94      5015
          11       0.90      0.84      0.87      2610
          12       0.97      0.94      0.96      6045
          13       1.00      0.74      0.85      6417
          14       0.96      1.00      0.98      6775
          15       0.59      0.53      0.56      3324
          16       0.96      0.96      0.96      4246
          17       0.96      0.99      0.97      4849
          18       0.97      0.79      0.87      6356
          19       0.85      0.62      0.72      2304
          20       0.86      0.88      0.87      5279
          21       0.76      0.82      0.79      2518
          22       0.98      0.98      0.98      3306
          23       0.54      0.25      0.34      4078
          24       0.66      0.68      0.67      4158
          25       0.95      0.86      0.91      5271
          26       0.62      0.95      0.75      5968
          27       0.56      0.35      0.43      5277

    accuracy                           0.89    193503
   macro avg       0.85      0.84      0.84    193503
weighted avg       0.90      0.89      0.89    193503


=== Overall Accuracy ===
0.8914008707205114 [0.9390967817028458, 0.9251842676698168, 0.8206360309944037, 0.8810623278858064, 0.8910249453496845]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.997492     0.926930     0.999232 0.967489 0.998200
        CLTC_WT_Untreated  0.995444     0.901176     0.998336 0.943227 0.996973
Calreticulin_WT_Untreated  0.986302     0.919286     0.988504 0.724340 0.997324
        DAPI_WT_Untreated  0.990922     0.979326     0.996726 0.993365 0.989724
       DCP1A_WT_Untreated  0.993878     0.876656     0.997728 0.926891 0.995955
        FMRP_WT_Untreated  0.997693     0.906135     0.998682 0.881426 0.998985
         FUS_WT_Untreated  0.988392     0.732634     0.994715 0.774107 0.993399
       G3BP1_WT_Untreated  0.995185     0.895311     0.996358 0.742712 0.998768
       GM130_WT_Untreated  0.992218     0.789979     0.998660 0.949440 0.993346
     HNRNPA1_WT_Untreated  0.977525     0.836500     0.981906 0.589557 0.994853
       KIF5A_WT_Untreated  0.996927     0.940212     0.998427 0.940526 0.998419
       LAMP1_WT_Untreated  0.991675     0.828342     0.993648 0.611766 0.997917
      LSM14A_WT_Untreated  0.995464     0.905588     0.998327 0.945165 0.996997
         NCL_WT_Untreated  0.987508     0.615612     0.999659 0.983314 0.987593
        NEMO_WT_Untreated  0.998012     0.990404     0.998246 0.945545 0.999704
        NONO_WT_Untreated  0.990582     0.786002     0.995960 0.836439 0.994384
       PEX14_WT_Untreated  0.997217     0.975180     0.997776 0.917562 0.999369
         PML_WT_Untreated  0.992674     0.989166     0.992762 0.774344 0.999726
       PSD95_WT_Untreated  0.995894     0.893387     0.998674 0.948096 0.997113
        PURA_WT_Untreated  0.995298     0.712149     0.998331 0.820489 0.996921
  Phalloidin_WT_Untreated  0.991936     0.858148     0.995707 0.849279 0.996001
        SNCA_WT_Untreated  0.995786     0.889010     0.997254 0.816510 0.998472
         SON_WT_Untreated  0.999091     0.984154     0.999512 0.982746 0.999553
      SQSTM1_WT_Untreated  0.980144     0.491013     0.991922 0.594108 0.987795
       TDP43_WT_Untreated  0.989904     0.797068     0.994691 0.788470 0.994961
      TOMM20_WT_Untreated  0.993295     0.877687     0.996452 0.871043 0.996660
     Tubulin_WT_Untreated  0.983875     0.869681     0.987411 0.681381 0.995931
 mitotracker_WT_Untreated  0.982718     0.469693     0.996888 0.806515 0.985520
            Macro Average  0.991895     0.844158     0.995803 0.843066 0.995734
2025-08-20 22:41:32 INFO: [load_embeddings] multiplex=False
2025-08-20 22:41:32 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:41:32 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 22:41:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running RandomForestClassifier ===
Loading all batches...
2025-08-20 22:42:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:42:14 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:42:18 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:42:19 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-20 22:42:19 INFO: [load_embeddings] labels shape: (192220,)
2025-08-20 22:42:19 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:42:19 INFO: [load_embeddings] paths shape: (192220,)
2025-08-20 22:42:20 INFO: [load_embeddings] multiplex=False
2025-08-20 22:42:20 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:42:20 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 22:42:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:42:45 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:42:49 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:42:51 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:42:52 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-20 22:42:52 INFO: [load_embeddings] labels shape: (137464,)
2025-08-20 22:42:52 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:42:52 INFO: [load_embeddings] paths shape: (137464,)
2025-08-20 22:42:53 INFO: [load_embeddings] multiplex=False
2025-08-20 22:42:53 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:42:53 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 22:42:53 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:43:19 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:43:24 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:43:26 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:43:27 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-20 22:43:27 INFO: [load_embeddings] labels shape: (130788,)
2025-08-20 22:43:27 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:43:27 INFO: [load_embeddings] paths shape: (130788,)
2025-08-20 22:43:28 INFO: [load_embeddings] multiplex=False
2025-08-20 22:43:28 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:43:28 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 22:43:28 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:44:02 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:44:08 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:44:12 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:44:13 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-20 22:44:13 INFO: [load_embeddings] labels shape: (185840,)
2025-08-20 22:44:13 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:44:13 INFO: [load_embeddings] paths shape: (185840,)
2025-08-20 22:44:14 INFO: [load_embeddings] multiplex=False
2025-08-20 22:44:14 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:44:14 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 22:44:14 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:44:54 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:45:01 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:45:04 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:45:05 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-20 22:45:05 INFO: [load_embeddings] labels shape: (166314,)
2025-08-20 22:45:05 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:45:05 INFO: [load_embeddings] paths shape: (166314,)
2025-08-20 22:45:06 INFO: [load_embeddings] multiplex=False
2025-08-20 22:45:06 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 22:45:06 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 22:45:06 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 22:45:44 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 22:45:52 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 22:45:56 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 22:45:57 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-20 22:45:57 INFO: [load_embeddings] labels shape: (193503,)
2025-08-20 22:45:57 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 22:45:57 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.82      0.97      0.89      3787
           1       0.87      0.90      0.89      4311
           2       0.78      0.84      0.81      3390
           3       0.93      1.00      0.96     45031
           4       0.92      0.86      0.89      4057
           5       0.80      0.85      0.83       938
           6       0.99      0.31      0.47      3613
           7       0.59      0.88      0.71       953
           8       0.87      0.98      0.92      3228
           9       0.94      0.92      0.93      4216
          10       0.96      0.82      0.88      3856
          11       0.79      0.38      0.52      1375
          12       0.87      0.93      0.90      4171
          13       0.99      0.98      0.98      3624
          14       0.97      1.00      0.99      3952
          15       0.91      0.72      0.81      4542
          16       0.92      0.99      0.95      3866
          17       0.94      1.00      0.97      3757
          18       0.95      0.87      0.91      4292
          19       0.81      0.62      0.70       786
          20       0.78      0.85      0.81      3857
          21       0.99      0.82      0.90      2469
          22       1.00      0.99      0.99      4551
          23       0.63      0.54      0.58      3546
          24       0.73      0.84      0.78      3696
          25       0.75      0.85      0.80      3701
          26       0.87      0.63      0.73      3897
          27       0.87      0.90      0.88      4002

    accuracy                           0.89    137464
   macro avg       0.87      0.83      0.83    137464
weighted avg       0.90      0.89      0.89    137464

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.88      0.97      0.93      3867
           1       0.81      0.83      0.82      3424
           2       0.90      0.87      0.89      3987
           3       0.95      1.00      0.97     43178
           4       0.89      0.77      0.83      3476
           5       0.83      0.85      0.84      1754
           6       0.99      0.54      0.70      3703
           7       0.61      0.84      0.71      1846
           8       0.90      0.96      0.93      3826
           9       0.98      0.94      0.96      3833
          10       0.96      0.86      0.91      2963
          11       0.71      0.66      0.68       918
          12       0.91      0.91      0.91      3797
          13       0.99      0.99      0.99      3696
          14       0.91      0.99      0.95      3444
          15       0.97      0.83      0.90      3657
          16       0.87      0.99      0.93      3947
          17       0.93      1.00      0.96      3004
          18       0.92      0.92      0.92      3514
          19       0.79      0.51      0.62      1557
          20       0.60      0.83      0.70      3575
          21       0.97      0.85      0.91      2414
          22       1.00      0.99      0.99      3654
          23       0.52      0.47      0.49      3160
          24       0.86      0.91      0.89      3571
          25       0.50      0.88      0.64      3831
          26       0.84      0.43      0.57      3203
          27       0.58      0.11      0.19      3989

    accuracy                           0.87    130788
   macro avg       0.84      0.81      0.81    130788
weighted avg       0.88      0.87      0.87    130788

Training on Batches: [1], Testing on: [7].

=== Batch [7] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (185840, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.97      0.86      0.91      4789
           1       0.88      0.87      0.87      6170
           2       0.52      0.87      0.65      5999
           3       0.94      1.00      0.97     64486
           4       0.41      0.17      0.24      5786
           5       0.96      0.61      0.75      1726
           6       0.00      0.00      0.00        37
           7       0.56      0.69      0.62      1788
           8       0.39      0.37      0.38      6070
           9       0.87      0.02      0.04      5062
          10       0.90      0.92      0.91      5493
          11       0.03      0.05      0.04      2097
          12       0.49      0.13      0.21      5627
          13       0.15      0.00      0.00      6165
          14       0.54      1.00      0.70      4370
          15       0.60      0.87      0.71      4624
          16       0.90      0.96      0.93      4011
          17       0.43      0.98      0.60      4952
          18       0.56      0.95      0.71      5064
          19       0.89      0.36      0.51      2163
          20       0.76      0.91      0.83      5532
          21       0.93      0.43      0.59      2126
          22       1.00      0.99      0.99      5574
          23       0.53      0.40      0.45      5085
          24       0.75      0.32      0.45      4525
          25       0.82      0.79      0.81      5021
          26       0.65      0.90      0.75      6590
          27       0.06      0.01      0.01      4908

    accuracy                           0.74    185840
   macro avg       0.62      0.59      0.56    185840
weighted avg       0.73      0.74      0.71    185840

Training on Batches: [1], Testing on: [8].

=== Batch [8] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (166314, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.94      0.91      0.93      4070
           1       0.81      0.75      0.78      3949
           2       0.75      0.97      0.84      5874
           3       0.90      1.00      0.94     55763
           4       0.80      0.92      0.86      5741
           5       0.89      0.73      0.80      1933
           6       0.91      0.15      0.25      5849
           7       0.61      0.69      0.64      2165
           8       0.89      0.48      0.63      5637
           9       0.74      0.86      0.80      5508
          10       0.96      0.93      0.94      3647
          11       0.59      0.09      0.15      2717
          12       0.64      0.92      0.75      5484
          13       0.94      0.83      0.88      5848
          14       0.96      1.00      0.98      5741
          15       0.79      0.65      0.71      4699
          16       0.82      0.97      0.89      4075
          17       0.85      0.98      0.91      3375
          18       0.85      0.90      0.88      2263
          19       0.85      0.46      0.60      1816
          20       0.76      0.87      0.81      4069
          21       0.97      0.76      0.85      1510
          22       0.98      1.00      0.99      5255
          23       0.57      0.39      0.46      3269
          24       0.76      0.75      0.76      3766
          25       0.55      0.80      0.65      3809
          26       0.70      0.83      0.76      4782
          27       0.48      0.06      0.10      3700

    accuracy                           0.83    166314
   macro avg       0.79      0.74      0.73    166314
weighted avg       0.83      0.83      0.81    166314

Training on Batches: [1], Testing on: [9].

=== Batch [9] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (193503, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.92      0.62      0.74      3071
           1       0.90      0.91      0.90      6371
           2       0.58      0.98      0.73      6644
           3       0.92      1.00      0.96     63040
           4       0.78      0.92      0.84      6827
           5       0.98      0.76      0.85      2353
           6       0.92      0.60      0.73      6434
           7       0.78      0.84      0.81      2695
           8       0.92      0.96      0.94      6365
           9       0.94      0.66      0.78      5907
          10       0.85      0.94      0.89      5015
          11       0.44      0.07      0.12      2610
          12       0.80      0.87      0.83      6045
          13       1.00      0.82      0.90      6417
          14       0.93      1.00      0.96      6775
          15       0.41      0.52      0.46      3324
          16       0.90      0.96      0.93      4246
          17       0.94      0.99      0.96      4849
          18       0.85      0.88      0.87      6356
          19       0.93      0.40      0.56      2304
          20       0.72      0.91      0.80      5279
          21       0.79      0.48      0.59      2518
          22       0.98      0.99      0.99      3306
          23       0.54      0.22      0.31      4078
          24       0.56      0.40      0.47      4158
          25       0.79      0.71      0.75      5271
          26       0.63      0.92      0.75      5968
          27       0.33      0.10      0.15      5277

    accuracy                           0.84    193503
   macro avg       0.79      0.73      0.73    193503
weighted avg       0.83      0.84      0.82    193503


=== Overall Accuracy ===
0.836347164574688 [0.893172030495257, 0.8746291708719455, 0.7441239776151528, 0.83137919838378, 0.8384314455073048]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV
      ANXA11_WT_Untreated  0.994766     0.876379     0.997685 0.903221 0.996954
        CLTC_WT_Untreated  0.991804     0.860681     0.995826 0.863497 0.995727
Calreticulin_WT_Untreated  0.981796     0.918282     0.983884 0.651845 0.997278
        DAPI_WT_Untreated  0.972702     0.999090     0.959494 0.925070 0.999526
       DCP1A_WT_Untreated  0.984778     0.721752     0.993419 0.782740 0.990883
        FMRP_WT_Untreated  0.996398     0.750000     0.999061 0.896211 0.997302
         FUS_WT_Untreated  0.983407     0.397382     0.997895 0.823536 0.985290
       G3BP1_WT_Untreated  0.992369     0.779930     0.994864 0.640696 0.997409
       GM130_WT_Untreated  0.984615     0.712847     0.993272 0.771427 0.990875
     HNRNPA1_WT_Untreated  0.987039     0.661013     0.997169 0.878842 0.989548
       KIF5A_WT_Untreated  0.995185     0.899352     0.997720 0.912534 0.997339
       LAMP1_WT_Untreated  0.984808     0.170423     0.994648 0.277852 0.990023
      LSM14A_WT_Untreated  0.984426     0.731810     0.992472 0.755879 0.991466
         NCL_WT_Untreated  0.989144     0.673553     0.999454 0.975807 0.989441
        NEMO_WT_Untreated  0.994012     0.998600     0.993871 0.833608 0.999957
        NONO_WT_Untreated  0.985179     0.727094     0.991963 0.703962 0.992820
       PEX14_WT_Untreated  0.996076     0.972748     0.996668 0.881075 0.999307
         PML_WT_Untreated  0.990250     0.986758     0.990337 0.719437 0.999664
       PSD95_WT_Untreated  0.990644     0.904323     0.992985 0.777569 0.997394
        PURA_WT_Untreated  0.993294     0.441456     0.999205 0.856115 0.994048
  Phalloidin_WT_Untreated  0.987427     0.877958     0.990513 0.722868 0.996539
        SNCA_WT_Untreated  0.994851     0.664673     0.999390 0.937388 0.995409
         SON_WT_Untreated  0.999538     0.991853     0.999755 0.991321 0.999770
      SQSTM1_WT_Untreated  0.978334     0.396227     0.992351 0.555043 0.985561
       TDP43_WT_Untreated  0.985583     0.623250     0.994578 0.740509 0.990684
      TOMM20_WT_Untreated  0.983939     0.799519     0.988975 0.664439 0.994495
     Tubulin_WT_Untreated  0.982800     0.785966     0.988894 0.686600 0.993344
 mitotracker_WT_Untreated  0.975874     0.219236     0.996773 0.652339 0.978824
            Macro Average  0.987894     0.733648     0.993326 0.777908 0.993817
2025-08-20 23:49:52 INFO: [load_embeddings] multiplex=False
2025-08-20 23:49:52 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:49:52 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-20 23:49:52 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
=== Running ExtraTreesClassifier ===
Loading all batches...
2025-08-20 23:50:28 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:50:35 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:50:39 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:50:40 INFO: [load_embeddings] embeddings shape: (192220, 2048)
2025-08-20 23:50:40 INFO: [load_embeddings] labels shape: (192220,)
2025-08-20 23:50:40 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:50:40 INFO: [load_embeddings] paths shape: (192220,)
2025-08-20 23:50:40 INFO: [load_embeddings] multiplex=False
2025-08-20 23:50:40 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:50:40 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-20 23:50:40 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 23:51:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:51:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:51:13 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:51:13 INFO: [load_embeddings] embeddings shape: (137464, 2048)
2025-08-20 23:51:13 INFO: [load_embeddings] labels shape: (137464,)
2025-08-20 23:51:13 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:51:13 INFO: [load_embeddings] paths shape: (137464,)
2025-08-20 23:51:14 INFO: [load_embeddings] multiplex=False
2025-08-20 23:51:14 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:51:14 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-20 23:51:14 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 23:51:40 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:51:45 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:51:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:51:49 INFO: [load_embeddings] embeddings shape: (130788, 2048)
2025-08-20 23:51:49 INFO: [load_embeddings] labels shape: (130788,)
2025-08-20 23:51:49 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:51:49 INFO: [load_embeddings] paths shape: (130788,)
2025-08-20 23:51:49 INFO: [load_embeddings] multiplex=False
2025-08-20 23:51:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:51:49 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-20 23:51:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 23:52:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:52:31 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:52:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:52:36 INFO: [load_embeddings] embeddings shape: (185840, 2048)
2025-08-20 23:52:36 INFO: [load_embeddings] labels shape: (185840,)
2025-08-20 23:52:36 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:52:36 INFO: [load_embeddings] paths shape: (185840,)
2025-08-20 23:52:37 INFO: [load_embeddings] multiplex=False
2025-08-20 23:52:37 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:52:37 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-20 23:52:37 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 23:53:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:53:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:53:27 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:53:28 INFO: [load_embeddings] embeddings shape: (166314, 2048)
2025-08-20 23:53:28 INFO: [load_embeddings] labels shape: (166314,)
2025-08-20 23:53:28 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:53:28 INFO: [load_embeddings] paths shape: (166314,)
2025-08-20 23:53:29 INFO: [load_embeddings] multiplex=False
2025-08-20 23:53:29 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-20 23:53:29 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-20 23:53:29 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-08-20 23:54:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-20 23:54:12 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-20 23:54:16 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-20 23:54:17 INFO: [load_embeddings] embeddings shape: (193503, 2048)
2025-08-20 23:54:17 INFO: [load_embeddings] labels shape: (193503,)
2025-08-20 23:54:17 INFO: [load_embeddings] example label: ANXA11_WT_Untreated
2025-08-20 23:54:17 INFO: [load_embeddings] paths shape: (193503,)
Batches loaded.
Training on Batches: [1], Testing on: [2].

=== Batch [2] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (137464, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364
              precision    recall  f1-score   support

           0       0.81      0.97      0.88      3787
           1       0.90      0.91      0.90      4311
           2       0.74      0.87      0.80      3390
           3       0.92      1.00      0.96     45031
           4       0.92      0.86      0.89      4057
           5       0.84      0.83      0.84       938
           6       0.99      0.20      0.33      3613
           7       0.61      0.88      0.72       953
           8       0.85      0.98      0.91      3228
           9       0.95      0.88      0.91      4216
          10       0.97      0.82      0.89      3856
          11       0.83      0.32      0.46      1375
          12       0.86      0.93      0.90      4171
          13       0.99      0.98      0.98      3624
          14       0.97      1.00      0.98      3952
          15       0.92      0.72      0.80      4542
          16       0.92      0.98      0.95      3866
          17       0.94      1.00      0.97      3757
          18       0.95      0.88      0.91      4292
          19       0.89      0.52      0.65       786
          20       0.77      0.87      0.82      3857
          21       0.99      0.78      0.87      2469
          22       1.00      0.99      0.99      4551
          23       0.64      0.48      0.55      3546
          24       0.74      0.81      0.78      3696
          25       0.75      0.87      0.80      3701
          26       0.82      0.66      0.73      3897
          27       0.87      0.88      0.88      4002

    accuracy                           0.89    137464
   macro avg       0.87      0.82      0.82    137464
weighted avg       0.89      0.89      0.88    137464

Training on Batches: [1], Testing on: [3].

=== Batch [3] ===
Train: (192220, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
Test: (130788, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27]
ANXA11_WT_Untreated: 4777
CLTC_WT_Untreated: 5749
Calreticulin_WT_Untreated: 6639
DAPI_WT_Untreated: 63181
DCP1A_WT_Untreated: 5177
FMRP_WT_Untreated: 2678
FUS_WT_Untreated: 6586
G3BP1_WT_Untreated: 2689
GM130_WT_Untreated: 6376
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LAMP1_WT_Untreated: 1561
LSM14A_WT_Untreated: 6008
NCL_WT_Untreated: 6573
NEMO_WT_Untreated: 5115
NONO_WT_Untreated: 4542
PEX14_WT_Untreated: 4830
PML_WT_Untreated: 4895
PSD95_WT_Untreated: 5739
PURA_WT_Untreated: 2650
Phalloidin_WT_Untreated: 5431
SNCA_WT_Untreated: 2368
SON_WT_Untreated: 4987
SQSTM1_WT_Untreated: 4527
TDP43_WT_Untreated: 4683
TOMM20_WT_Untreated: 4400
Tubulin_WT_Untreated: 4873
mitotracker_WT_Untreated: 4364

Pretrained Model¶

In [5]:
pretrained_dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model",
    "multiplexed": False,
    "config_fmt": "newNeuronsD8FigureConfig_UMAP1_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [ ]:
## Baseline
run_baseline_model(
    dataset_config= pretrained_dataset_config,
    batches=[1, 2, 3, 7, 8, 9],
    balance=False,
    norm=False,
    choose_features=False,
    top_k=100,
    label_map=None,
    classifier_class=cuMLLogisticRegression,
    classifier_kwargs={},
    train_specific_batches = [1],        
    results_csv="classification_results-indi.csv"
)
In [16]:
for clf_class, clf_kwargs in additional_classifiers:
    print(f"\n=== Running {clf_class.__name__} ===")
    run_baseline_model(
        dataset_config=pretrained_dataset_config,
        batches=[1, 2, 3, 7, 8, 9],
        classifier_class=clf_class,
        classifier_kwargs=clf_kwargs,
        train_specific_batches=[1],
        results_csv="classification_results-indi.csv"
    )
2025-08-21 14:28:16 INFO: [load_embeddings] multiplex=False
2025-08-21 14:28:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:28:16 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-21 14:28:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
=== Running ExtraTreesClassifier ===
Loading all batches...
2025-08-21 14:28:26 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 14:28:28 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 14:28:29 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 14:28:30 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-21 14:28:30 INFO: [load_embeddings] labels shape: (196119,)
2025-08-21 14:28:30 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 14:28:30 INFO: [load_embeddings] paths shape: (196119,)
2025-08-21 14:28:30 INFO: [load_embeddings] multiplex=False
2025-08-21 14:28:30 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:28:30 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-21 14:28:30 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-08-21 14:28:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 14:28:37 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 14:28:38 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 14:28:39 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-21 14:28:39 INFO: [load_embeddings] labels shape: (141079,)
2025-08-21 14:28:39 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-21 14:28:39 INFO: [load_embeddings] paths shape: (141079,)
2025-08-21 14:28:39 INFO: [load_embeddings] multiplex=False
2025-08-21 14:28:39 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:28:39 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-21 14:28:39 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-08-21 14:28:46 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 14:28:47 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 14:28:48 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 14:28:48 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-21 14:28:48 INFO: [load_embeddings] labels shape: (134336,)
2025-08-21 14:28:48 INFO: [load_embeddings] example label: LAMP1_WT_Untreated
2025-08-21 14:28:48 INFO: [load_embeddings] paths shape: (134336,)
2025-08-21 14:28:49 INFO: [load_embeddings] multiplex=False
2025-08-21 14:28:49 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:28:49 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-21 14:28:49 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-08-21 14:28:57 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 14:28:59 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 14:29:00 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 14:29:01 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-21 14:29:01 INFO: [load_embeddings] labels shape: (189079,)
2025-08-21 14:29:01 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 14:29:01 INFO: [load_embeddings] paths shape: (189079,)
2025-08-21 14:29:01 INFO: [load_embeddings] multiplex=False
2025-08-21 14:29:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:29:01 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-21 14:29:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-08-21 14:29:10 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 14:29:12 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 14:29:14 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 14:29:15 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-21 14:29:15 INFO: [load_embeddings] labels shape: (169304,)
2025-08-21 14:29:15 INFO: [load_embeddings] example label: G3BP1_WT_Untreated
2025-08-21 14:29:15 INFO: [load_embeddings] paths shape: (169304,)
2025-08-21 14:29:15 INFO: [load_embeddings] multiplex=False
2025-08-21 14:29:15 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 14:29:15 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-21 14:29:15 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-08-21 14:29:24 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[16], line 3
      1 for clf_class, clf_kwargs in additional_classifiers1:
      2     print(f"\n=== Running {clf_class.__name__} ===")
----> 3     run_baseline_model(
      4         dataset_config=pretrained_dataset_config,
      5         batches=[1, 2, 3, 7, 8, 9],
      6         classifier_class=clf_class,
      7         classifier_kwargs=clf_kwargs,
      8         train_specific_batches=[1],
      9         results_csv="classification_results-indi.csv"
     10     )

File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/classifier/utils.py:292, in run_baseline_model(dataset_config, batches, balance, norm, choose_features, top_k, apply_pca, pca_components, label_map, classifier_class, classifier_kwargs, test_specific_batches, train_specific_batches, return_proba, calculate_auc, results_csv)
    272     header = not os.path.exists(results_csv)
    273     pd.DataFrame([row]).to_csv(results_csv, mode="a", header=header, index=False)
    275 def run_baseline_model(
    276     dataset_config,                # dict with paths/loading settings for embeddings
    277     batches=[1, 2, 3, 7, 8, 9,],   # list of batch IDs to include in the experiment
    278     balance=False,                 # whether to balance class distributions during training
    279     norm=False,                    # whether to normalize features before training
    280     choose_features=False,         # whether to select top features (e.g., univariate ranking)
    281     top_k=100,                     # number of features to keep if choose_features=True
    282     apply_pca=False,               # whether to reduce dimensionality with PCA
    283     pca_components=50,             # number of PCA components if apply_pca=True
    284     label_map=None,                # optional mapping to merge/remap labels, e.g. {"WT":0,"KO":1}
    285     classifier_class=cuMLLogisticRegression, # classifier class to use (any sklearn/cuML-compatible estimator)
    286     classifier_kwargs=dict(),      # extra arguments for the classifier constructor (e.g. {"max_depth":10})
    287     test_specific_batches=None,    # int or list: which batches to use as test folds; None = default LOOCV
    288     train_specific_batches=None,   # int or list: which batches to use for training; None = complement of test
    289     return_proba=False,            # if True, return DataFrame of predicted probabilities along with metrics
    290     calculate_auc=False,            # if True, compute ROC AUC for the predictions
    291     results_csv=None
--> 292 ):
    293     accuracies = []
    294     accumulated_cm = None

File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/classifier/utils.py:128, in load_all_batches(batch_ids, dataset_config)
    125 config_data = load_config_file(config_path_data, 'data')
    126 config_data.OUTPUTS_FOLDER = path_to_embeddings
--> 128 X, y, _ = load_embeddings(path_to_embeddings, config_data)
    130 if multiplexed:
    131     analyzer = AnalyzerMultiplexMarkers(config_data, path_to_embeddings)

File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:258, in load_embeddings(model_output_folder, config_data, sample_fraction, multiplex)
    256 paths = np.concatenate(paths)
    257 labels = edit_labels_by_config(labels, config_data, multiplex)
--> 258 filtered_labels, filtered_embeddings, filtered_paths = __filter(labels, embeddings, paths, config_data, multiplex)
    260 if sample_fraction < 1.0:
    261     logging.info(f"[load_embeddings] Sampling {sample_fraction*100:.1f}% of each label group (from {len(filtered_labels)} total labels)")

File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:377, in __filter(labels, embeddings, paths, config_data, multiplex)
    375 if markers_to_exclude and (not multiplex):
    376     logging.info(f"[embeddings_utils._filter] markers_to_exclude = {markers_to_exclude}")
--> 377     labels, embeddings, paths = __filter_by_label_part(labels, embeddings, paths, markers_to_exclude,
    378                               get_markers_from_labels, include=False)
    379 if markers and (not multiplex):
    380     logging.info(f"[embeddings_utils._filter] markers = {markers}")

File /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/embeddings/embeddings_utils.py:423, in __filter_by_label_part(labels, embeddings, paths, filter_on, get_parts_from_labels, config_data, include)
    421     indices_to_keep = np.where(~np.isin(parts_of_labels, filter_on))[0]
    422 labels = labels[indices_to_keep]
--> 423 embeddings = embeddings[indices_to_keep]
    424 paths = paths[indices_to_keep]
    425 return labels, embeddings, paths

KeyboardInterrupt: 
In [ ]:
 
In [6]:
 
In [9]:
Cs        = [1.0, 3.0, 10.0, 30.0]     # weaker regularization for 200 features
balances  = [False]#[False, True]        # uses your run_baseline_model's 'balance'
norms     = [False, True]       # uses your run_baseline_model's 'norm'

best = None

for dataset in [dataset_config]:#Cytoself_dataset_config, pretrained_dataset_config]:
    print(dataset)
    for C, bal, norm in itertools.product(Cs, balances, norms):
        print(C,bal,norm)
        clf_class = cuLogisticRegression
        clf_kwargs = dict(
            C=C,
        )
        try:
            res = run_baseline_model(
                dataset_config=dataset,
                batches=[1, 2, 3, 7, 8, 9],
                classifier_class=clf_class,
                classifier_kwargs=clf_kwargs,
                train_specific_batches=[1],
                results_csv=f"classifier_test_linear_params.csv",
                norm=norm,
                balance=bal
            )

            # Pick your metric (prefer macro F1 if available)
            score = res.get("f1_macro", res.get("accuracy"))
            if score is not None and (best is None or score > best[0]):
                best = (score, {"C": C, "balance": bal, "norm": norm})
        except:
            print('failed')

    print("Best config:", best)
2025-08-21 17:51:29 INFO: [load_embeddings] multiplex=False
2025-08-21 17:51:29 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:51:29 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-21 17:51:29 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
{'path_to_embeddings': '/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen', 'multiplexed': False, 'config_fmt': 'newNeuronsD8FigureConfig_UMAP1_B{batch}', 'config_dir': 'manuscript/manuscript_figures_data_config'}
30.0 False False
Loading all batches...
2025-08-21 17:51:38 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:51:40 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:51:42 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:51:42 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-21 17:51:42 INFO: [load_embeddings] labels shape: (196119,)
2025-08-21 17:51:42 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 17:51:42 INFO: [load_embeddings] paths shape: (196119,)
2025-08-21 17:51:42 INFO: [load_embeddings] multiplex=False
2025-08-21 17:51:42 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:51:42 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-21 17:51:42 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:51:48 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:51:50 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:51:51 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:51:51 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-21 17:51:51 INFO: [load_embeddings] labels shape: (141079,)
2025-08-21 17:51:51 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-21 17:51:51 INFO: [load_embeddings] paths shape: (141079,)
2025-08-21 17:51:51 INFO: [load_embeddings] multiplex=False
2025-08-21 17:51:51 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:51:51 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-21 17:51:51 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:51:58 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:51:59 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:52:00 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:52:01 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-21 17:52:01 INFO: [load_embeddings] labels shape: (134336,)
2025-08-21 17:52:01 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-21 17:52:01 INFO: [load_embeddings] paths shape: (134336,)
2025-08-21 17:52:01 INFO: [load_embeddings] multiplex=False
2025-08-21 17:52:01 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:52:01 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-21 17:52:01 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:52:09 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:52:11 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:52:13 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:52:13 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-21 17:52:13 INFO: [load_embeddings] labels shape: (189079,)
2025-08-21 17:52:13 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 17:52:13 INFO: [load_embeddings] paths shape: (189079,)
2025-08-21 17:52:13 INFO: [load_embeddings] multiplex=False
2025-08-21 17:52:13 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:52:13 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-21 17:52:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:52:23 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:52:25 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:52:27 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:52:27 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-21 17:52:27 INFO: [load_embeddings] labels shape: (169304,)
2025-08-21 17:52:27 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-21 17:52:27 INFO: [load_embeddings] paths shape: (169304,)
2025-08-21 17:52:27 INFO: [load_embeddings] multiplex=False
2025-08-21 17:52:27 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:52:27 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-21 17:52:27 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:52:36 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:52:38 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:52:40 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:52:40 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-21 17:52:40 INFO: [load_embeddings] labels shape: (196652,)
2025-08-21 17:52:40 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-21 17:52:40 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3, 7, 8, 9].

=== Batch [2, 3, 7, 8, 9] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (830450, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
              precision    recall  f1-score   support

           0       0.96      0.89      0.92     19584
           1       0.91      0.91      0.91     24225
           2       0.73      0.93      0.82     25894
           3       1.00      1.00      1.00    271498
           4       0.93      0.93      0.93     25887
           5       0.92      0.97      0.94      8704
           6       0.76      0.90      0.83     19636
           7       0.82      0.98      0.89      9447
           8       0.90      0.74      0.81     25126
           9       0.91      0.77      0.84     24526
          10       0.92      0.96      0.94     20974
          11       0.66      0.90      0.76      9717
          12       0.95      0.83      0.88     25124
          13       0.98      0.75      0.85     25750
          14       0.96      0.99      0.97     24282
          15       0.90      0.82      0.86     20846
          16       0.95      0.96      0.96     20145
          17       0.77      0.99      0.87     19937
          18       0.92      0.91      0.92     21489
          19       0.85      0.91      0.88      8626
          20       0.87      0.95      0.90     22312
          21       0.75      0.85      0.79     11037
          22       0.96      1.00      0.98     22340
          23       0.62      0.65      0.63     19138
          24       0.82      0.88      0.85     19716
          25       0.94      0.96      0.95     16541
          26       0.90      0.90      0.90     21633
          27       0.77      0.69      0.73     24440
          28       0.87      0.50      0.63     21876

    accuracy                           0.91    830450
   macro avg       0.87      0.88      0.87    830450
weighted avg       0.91      0.91      0.91    830450


=== Overall Accuracy ===
0.9088193148293094 [0.9088193148293094]
2025-08-21 17:52:54 INFO: [load_embeddings] multiplex=False
2025-08-21 17:52:54 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:52:54 INFO: [load_embeddings] input_folders = ['batch1']
2025-08-21 17:52:54 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1
      ANXA11_WT_Untreated  0.996503     0.891493     0.999039 0.957287 0.997384 0.923219
        CLTC_WT_Untreated  0.994722     0.907410     0.997346 0.911284 0.997218 0.909343
Calreticulin_WT_Untreated  0.987077     0.934773     0.988760 0.728014 0.997881 0.818538
        DAPI_WT_Untreated  0.999324     0.999963     0.999014 0.997975 0.999982 0.998968
       DCP1A_WT_Untreated  0.995714     0.933287     0.997723 0.929517 0.997853 0.931398
        FMRP_WT_Untreated  0.998801     0.967142     0.999136 0.922217 0.999652 0.944145
         FUS_WT_Untreated  0.991017     0.901966     0.993174 0.761895 0.997615 0.826034
       G3BP1_WT_Untreated  0.997327     0.977771     0.997552 0.821286 0.999744 0.892723
       GM130_WT_Untreated  0.989556     0.735453     0.997484 0.901195 0.991793 0.809932
     HNRNPA1_WT_Untreated  0.991071     0.771141     0.997764 0.913010 0.993068 0.836100
       KIF5A_WT_Untreated  0.997063     0.964861     0.997897 0.922421 0.999088 0.943164
       LAMP1_WT_Untreated  0.993506     0.902130     0.994588 0.663689 0.998836 0.764755
      LSM14A_WT_Untreated  0.993414     0.827973     0.998576 0.947742 0.994654 0.883819
         NCL_WT_Untreated  0.991603     0.747379     0.999418 0.976259 0.991976 0.846623
        NEMO_WT_Untreated  0.998436     0.992422     0.998617 0.955777 0.999771 0.973755
        NONO_WT_Untreated  0.993124     0.816272     0.997678 0.900508 0.995281 0.856323
       PEX14_WT_Untreated  0.997941     0.963316     0.998802 0.952348 0.999088 0.957801
         PML_WT_Untreated  0.992610     0.993730     0.992582 0.767193 0.999845 0.865890
       PSD95_WT_Untreated  0.995730     0.913956     0.997902 0.920467 0.997715 0.917200
        PURA_WT_Untreated  0.997476     0.914677     0.998345 0.852973 0.999104 0.882748
  Phalloidin_WT_Untreated  0.994621     0.945097     0.995988 0.866743 0.998480 0.904226
        SNCA_WT_Untreated  0.994119     0.846697     0.996105 0.745394 0.997931 0.792823
         SON_WT_Untreated  0.998885     0.997001     0.998937 0.962865 0.999917 0.979636
      SQSTM1_WT_Untreated  0.982615     0.652576     0.990401 0.615920 0.991793 0.633718
       TDP43_WT_Untreated  0.992738     0.884916     0.995360 0.822622 0.997196 0.852633
        TIA1_WT_Untreated  0.997965     0.958346     0.998770 0.940604 0.999153 0.949392
      TOMM20_WT_Untreated  0.995032     0.904313     0.997458 0.904898 0.997441 0.904606
     Tubulin_WT_Untreated  0.984923     0.689484     0.993881 0.773585 0.990615 0.729118
 mitotracker_WT_Untreated  0.984725     0.496343     0.997938 0.866906 0.986529 0.631261
            Macro Average  0.993712     0.876962     0.996767 0.869055 0.996780 0.867582
30.0 False True
Loading all batches...
2025-08-21 17:52:59 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:01 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:02 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:03 INFO: [load_embeddings] embeddings shape: (196119, 192)
2025-08-21 17:53:03 INFO: [load_embeddings] labels shape: (196119,)
2025-08-21 17:53:03 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 17:53:03 INFO: [load_embeddings] paths shape: (196119,)
2025-08-21 17:53:03 INFO: [load_embeddings] multiplex=False
2025-08-21 17:53:03 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:53:03 INFO: [load_embeddings] input_folders = ['batch2']
2025-08-21 17:53:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:53:07 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:08 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:09 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:09 INFO: [load_embeddings] embeddings shape: (141079, 192)
2025-08-21 17:53:09 INFO: [load_embeddings] labels shape: (141079,)
2025-08-21 17:53:09 INFO: [load_embeddings] example label: HNRNPA1_WT_Untreated
2025-08-21 17:53:09 INFO: [load_embeddings] paths shape: (141079,)
2025-08-21 17:53:10 INFO: [load_embeddings] multiplex=False
2025-08-21 17:53:10 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:53:10 INFO: [load_embeddings] input_folders = ['batch3']
2025-08-21 17:53:10 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:53:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:15 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:16 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:16 INFO: [load_embeddings] embeddings shape: (134336, 192)
2025-08-21 17:53:16 INFO: [load_embeddings] labels shape: (134336,)
2025-08-21 17:53:16 INFO: [load_embeddings] example label: TOMM20_WT_Untreated
2025-08-21 17:53:16 INFO: [load_embeddings] paths shape: (134336,)
2025-08-21 17:53:16 INFO: [load_embeddings] multiplex=False
2025-08-21 17:53:16 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:53:16 INFO: [load_embeddings] input_folders = ['batch7']
2025-08-21 17:53:16 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:53:21 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:23 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:25 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:25 INFO: [load_embeddings] embeddings shape: (189079, 192)
2025-08-21 17:53:25 INFO: [load_embeddings] labels shape: (189079,)
2025-08-21 17:53:25 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-08-21 17:53:25 INFO: [load_embeddings] paths shape: (189079,)
2025-08-21 17:53:25 INFO: [load_embeddings] multiplex=False
2025-08-21 17:53:25 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:53:25 INFO: [load_embeddings] input_folders = ['batch8']
2025-08-21 17:53:25 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:53:31 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:33 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:35 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:35 INFO: [load_embeddings] embeddings shape: (169304, 192)
2025-08-21 17:53:35 INFO: [load_embeddings] labels shape: (169304,)
2025-08-21 17:53:35 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-08-21 17:53:35 INFO: [load_embeddings] paths shape: (169304,)
2025-08-21 17:53:35 INFO: [load_embeddings] multiplex=False
2025-08-21 17:53:35 INFO: [load_embeddings] experiment_type = neuronsDay8_new
2025-08-21 17:53:35 INFO: [load_embeddings] input_folders = ['batch9']
2025-08-21 17:53:35 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-08-21 17:53:40 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-08-21 17:53:43 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-08-21 17:53:44 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-08-21 17:53:45 INFO: [load_embeddings] embeddings shape: (196652, 192)
2025-08-21 17:53:45 INFO: [load_embeddings] labels shape: (196652,)
2025-08-21 17:53:45 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-08-21 17:53:45 INFO: [load_embeddings] paths shape: (196652,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3, 7, 8, 9].

=== Batch [2, 3, 7, 8, 9] ===
Train: (196119, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
Test: (830450, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28]
DAPI_WT_Untreated: 63181
PEX14_WT_Untreated: 4830
Calreticulin_WT_Untreated: 6639
TDP43_WT_Untreated: 4683
TIA1_WT_Untreated: 3899
TOMM20_WT_Untreated: 4400
HNRNPA1_WT_Untreated: 6023
KIF5A_WT_Untreated: 4799
LSM14A_WT_Untreated: 6008
SON_WT_Untreated: 4987
CLTC_WT_Untreated: 5749
GM130_WT_Untreated: 6376
NEMO_WT_Untreated: 5115
Tubulin_WT_Untreated: 4873
PURA_WT_Untreated: 2650
FMRP_WT_Untreated: 2678
DCP1A_WT_Untreated: 5177
NONO_WT_Untreated: 4542
PML_WT_Untreated: 4895
mitotracker_WT_Untreated: 4364
SQSTM1_WT_Untreated: 4527
ANXA11_WT_Untreated: 4777
Phalloidin_WT_Untreated: 5431
G3BP1_WT_Untreated: 2689
PSD95_WT_Untreated: 5739
FUS_WT_Untreated: 6586
SNCA_WT_Untreated: 2368
NCL_WT_Untreated: 6573
LAMP1_WT_Untreated: 1561
[W] [17:54:40.299559] L-BFGS: max iterations reached
[W] [17:54:40.301984] Maximum iterations reached before solver is converged. To increase model accuracy you can increase the number of iterations (max_iter) or improve the scaling of the input data.
              precision    recall  f1-score   support

           0       0.96      0.89      0.93     19584
           1       0.93      0.91      0.92     24225
           2       0.73      0.93      0.81     25894
           3       1.00      1.00      1.00    271498
           4       0.90      0.92      0.91     25887
           5       0.93      0.96      0.94      8704
           6       0.78      0.87      0.82     19636
           7       0.81      0.97      0.88      9447
           8       0.93      0.78      0.85     25126
           9       0.88      0.80      0.84     24526
          10       0.93      0.96      0.94     20974
          11       0.69      0.89      0.78      9717
          12       0.91      0.90      0.91     25124
          13       0.99      0.75      0.85     25750
          14       0.95      0.99      0.97     24282
          15       0.89      0.84      0.86     20846
          16       0.95      0.96      0.95     20145
          17       0.81      0.99      0.89     19937
          18       0.91      0.92      0.91     21489
          19       0.81      0.91      0.86      8626
          20       0.86      0.95      0.90     22312
          21       0.78      0.81      0.80     11037
          22       0.95      1.00      0.98     22340
          23       0.62      0.66      0.64     19138
          24       0.84      0.86      0.85     19716
          25       0.95      0.96      0.95     16541
          26       0.91      0.89      0.90     21633
          27       0.77      0.70      0.74     24440
          28       0.88      0.49      0.63     21876

    accuracy                           0.91    830450
   macro avg       0.87      0.88      0.87    830450
weighted avg       0.91      0.91      0.91    830450


=== Overall Accuracy ===
0.9108374977421879 [0.9108374977421879]
=== Evaluation Metrics ===
                    Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1
      ANXA11_WT_Untreated  0.996666     0.894557     0.999132 0.961368 0.997458 0.926760
        CLTC_WT_Untreated  0.995160     0.905428     0.997857 0.926972 0.997160 0.916073
Calreticulin_WT_Untreated  0.986734     0.925504     0.988704 0.725048 0.997581 0.813103
        DAPI_WT_Untreated  0.999557     0.999919     0.999381 0.998727 0.999961 0.999323
       DCP1A_WT_Untreated  0.994352     0.919033     0.996776 0.901687 0.997393 0.910277
        FMRP_WT_Untreated  0.998745     0.957031     0.999187 0.925761 0.999545 0.941137
         FUS_WT_Untreated  0.991137     0.868660     0.994103 0.781070 0.996811 0.822539
       G3BP1_WT_Untreated  0.997012     0.971419     0.997307 0.805848 0.999670 0.880922
       GM130_WT_Untreated  0.991481     0.775372     0.998223 0.931574 0.993028 0.846326
     HNRNPA1_WT_Untreated  0.991021     0.800294     0.996825 0.884662 0.993940 0.840366
       KIF5A_WT_Untreated  0.996994     0.956708     0.998038 0.926665 0.998877 0.941447
       LAMP1_WT_Untreated  0.994056     0.893280     0.995249 0.690039 0.998732 0.778615
      LSM14A_WT_Untreated  0.994396     0.901091     0.997307 0.912569 0.996916 0.906793
         NCL_WT_Untreated  0.991968     0.750175     0.999705 0.987880 0.992067 0.852772
        NEMO_WT_Untreated  0.998254     0.989745     0.998510 0.952405 0.999691 0.970717
        NONO_WT_Untreated  0.993359     0.844527     0.997191 0.885608 0.996002 0.864579
       PEX14_WT_Untreated  0.997700     0.955622     0.998746 0.949869 0.998897 0.952737
         PML_WT_Untreated  0.994209     0.992476     0.994252 0.809417 0.999814 0.891648
       PSD95_WT_Untreated  0.995535     0.918516     0.997581 0.909795 0.997835 0.914135
        PURA_WT_Untreated  0.996902     0.910735     0.997806 0.813335 0.999062 0.859284
  Phalloidin_WT_Untreated  0.994610     0.947293     0.995917 0.864953 0.998541 0.904253
        SNCA_WT_Untreated  0.994443     0.811543     0.996906 0.779412 0.997460 0.795153
         SON_WT_Untreated  0.998647     0.997762     0.998671 0.954032 0.999938 0.975407
      SQSTM1_WT_Untreated  0.982641     0.659473     0.990264 0.615058 0.991954 0.636492
       TDP43_WT_Untreated  0.992904     0.860672     0.996120 0.843599 0.996610 0.852050
        TIA1_WT_Untreated  0.998071     0.955142     0.998943 0.948376 0.999088 0.951747
      TOMM20_WT_Untreated  0.995038     0.894421     0.997729 0.913292 0.997178 0.903758
     Tubulin_WT_Untreated  0.985200     0.702537     0.993771 0.773737 0.991005 0.736420
 mitotracker_WT_Untreated  0.984884     0.492915     0.998194 0.880748 0.986442 0.632082
            Macro Average  0.993851     0.877650     0.996841 0.870810 0.996850 0.869549
Best config: None
In [ ]:
 
In [ ]:
 
In [ ]:
 

Examine Features¶

In [2]:
X_train, y_train = load_batches([1])

le = LabelEncoder()
y_encoded = le.fit_transform(y_train)

f_scores, p_values = f_classif(X_train, y_encoded)
In [4]:
top_n = 100
top_idx = np.argsort(f_scores)[-top_n:]

plt.figure(figsize=(10, 5))
plt.bar(range(top_n), f_scores[top_idx])
plt.xticks(range(top_n), top_idx, rotation=45)
plt.ylabel("F-score")
plt.xlabel("Embedding dimension")
plt.title("Top correlated embedding features with labels")
plt.tight_layout()
plt.show()
In [8]:
def get_top_features_for_batch(batch, top_k=200):
    X, y = load_batches([batch])
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    f_scores, _ = f_classif(X, y_encoded)
    top_features = np.argsort(f_scores)[-top_k:]
    return set(top_features)

batches = [1, 2, 3, 7, 8, 9,]
top_k = 100

# Get top features per batch
batch_feature_map = {batch: get_top_features_for_batch(batch, top_k) for batch in batches}

# Create overlap matrix
overlap_matrix = pd.DataFrame(index=batches, columns=batches, dtype=int)
for b1, b2 in combinations(batches, 2):
    overlap = len(batch_feature_map[b1] & batch_feature_map[b2])
    overlap_matrix.loc[b1, b2] = overlap
    overlap_matrix.loc[b2, b1] = overlap
for b in batches:
    overlap_matrix.loc[b, b] = top_k

# Plot
plt.figure(figsize=(8, 6))
sns.heatmap(overlap_matrix.astype(int), annot=True, fmt='d', cmap='Blues')
plt.title(f'Overlap of Top {top_k} Features Across Batches')
plt.xlabel("Batch")
plt.ylabel("Batch")
plt.tight_layout()
plt.show()
In [11]:
feat1, feat2 = top_idx[:2]
X_vis = X_train[:, [feat1, feat2]]

# Convert encoded labels back to original strings
labels_str = le.inverse_transform(y_encoded)

# Create a scatter plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_vis[idx, 0], X_vis[idx, 1], label=label, alpha=0.7)

plt.xlabel(f'Feature {feat1}')
plt.ylabel(f'Feature {feat2}')
plt.title('Top 2 Embeddings by Label')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
In [6]:
from sklearn.decomposition import PCA
import umap
In [14]:
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:100]]

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = True
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
In [15]:
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:100]]

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = False
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
In [16]:
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:1000]]

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = True
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
In [17]:
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:1000]]

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = False
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
In [18]:
# 1. Select top 100 features from X_train
X_top = X_train

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = False
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
In [7]:
# 1. Select top 100 features from X_train
X_top = X_train[:, top_idx[:10]]

# 2. Encode labels to color
labels_str = le.inverse_transform(y_encoded)

# --- Option A: PCA ---
use_pca = False
if use_pca:
    reducer = PCA(n_components=2, random_state=42)
else:
    # --- Option B: UMAP ---
    reducer = umap.UMAP(n_components=2, random_state=42)

# 3. Reduce dimensions
X_embedded = reducer.fit_transform(X_top)

# 4. Plot
plt.figure(figsize=(8, 6))
for label in np.unique(labels_str):
    idx = labels_str == label
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=label, alpha=0.6)

plt.title("2D Projection of Top 100 Features using " + ("PCA" if use_pca else "UMAP"))
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
In [ ]: